Blame - src/Shader/ShaderCore.cpp - platform/external/swiftshader

blob: 883131c9f93d0cd7728f5bfbf3cf431b627c0793 [file] [log] [blame]

Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	2	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	6	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	7	// http://www.apache.org/licenses/LICENSE-2.0
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	8	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	14
				15	#include "ShaderCore.hpp"
				16
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	17	#include "Renderer/Renderer.hpp"
				18	#include "Common/Debug.hpp"
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	19
Alexis Hetu	d5c31da	2015-08-28 14:39:13 -0400	[diff] [blame]	20	#include <limits.h>
				21
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	22	namespace sw
				23	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	24	extern TranscendentalPrecision logPrecision;
				25	extern TranscendentalPrecision expPrecision;
				26	extern TranscendentalPrecision rcpPrecision;
				27	extern TranscendentalPrecision rsqPrecision;
				28
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	29	Vector4s::Vector4s()
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	30	{
				31	}
				32
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	33	Vector4s::Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	34	{
				35	this->x = Short4(x);
				36	this->y = Short4(y);
				37	this->z = Short4(z);
				38	this->w = Short4(w);
				39	}
				40
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	41	Vector4s::Vector4s(const Vector4s &rhs)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	42	{
				43	x = rhs.x;
				44	y = rhs.y;
				45	z = rhs.z;
				46	w = rhs.w;
				47	}
				48
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	49	Vector4s &Vector4s::operator=(const Vector4s &rhs)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	50	{
				51	x = rhs.x;
				52	y = rhs.y;
				53	z = rhs.z;
				54	w = rhs.w;
				55
				56	return *this;
				57	}
				58
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	59	Short4 &Vector4s::operator[](int i)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	60	{
				61	switch(i)
				62	{
				63	case 0: return x;
				64	case 1: return y;
				65	case 2: return z;
				66	case 3: return w;
				67	}
				68
				69	return x;
				70	}
				71
				72	Vector4f::Vector4f()
				73	{
				74	}
				75
				76	Vector4f::Vector4f(float x, float y, float z, float w)
				77	{
				78	this->x = Float4(x);
				79	this->y = Float4(y);
				80	this->z = Float4(z);
				81	this->w = Float4(w);
				82	}
				83
				84	Vector4f::Vector4f(const Vector4f &rhs)
				85	{
				86	x = rhs.x;
				87	y = rhs.y;
				88	z = rhs.z;
				89	w = rhs.w;
				90	}
				91
				92	Vector4f &Vector4f::operator=(const Vector4f &rhs)
				93	{
				94	x = rhs.x;
				95	y = rhs.y;
				96	z = rhs.z;
				97	w = rhs.w;
				98
				99	return *this;
				100	}
				101
				102	Float4 &Vector4f::operator[](int i)
				103	{
				104	switch(i)
				105	{
				106	case 0: return x;
				107	case 1: return y;
				108	case 2: return z;
				109	case 3: return w;
				110	}
				111
				112	return x;
				113	}
				114
				115	Float4 exponential2(RValue<Float4> x, bool pp)
				116	{
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame^]	117	// This implementation is based on 2^(i + f) = 2^i * 2^f,
				118	// where i is the integer part of x and f is the fraction.
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	119
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame^]	120	// For 2^i we can put the integer part directly in the exponent of
				121	// the IEEE-754 floating-point number. Clamp to prevent overflow
				122	// past the representation of infinity.
				123	Float4 x0 = x;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	124	x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
				125	x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	126
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame^]	127	Int4 i = RoundInt(x0 - Float4(0.5f));
				128	Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
				129
				130	// For the fractional part use a polynomial
				131	// which approximates 2^f in the 0 to 1 range.
				132	Float4 f = x0 - Float4(i);
				133	Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
				134	ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
				135	ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
				136	ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
				137	ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
				138	ff = ff * f + Float4(1.0f);
				139
				140	return ii * ff;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	141	}
				142
				143	Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp)
				144	{
				145	Float4 x0;
				146	Float4 x1;
				147	Float4 x2;
				148	Float4 x3;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	149
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	150	x0 = x;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	151
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	152	x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
				153	x1 = As<Float4>(As<UInt4>(x1) >> 8);
				154	x1 = As<Float4>(As<Int4>(x1) \| As<Int4>(Float4(1.0f)));
				155	x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
				156	x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) \| As<Int4>(Float4(1.0f)));
				157
				158	x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
				159	x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
				160	x2 /= x3;
				161
				162	x1 += (x0 - Float4(1.0f)) * x2;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	163
Alexis Hetu	0b7003b	2017-11-13 16:21:11 -0500	[diff] [blame]	164	Int4 pos_inf_x = CmpEQ(As<Int4>(x), Int4(0x7F800000));
				165	return As<Float4>((pos_inf_x & As<Int4>(x)) \| (~pos_inf_x & As<Int4>(x1)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	166	}
				167
				168	Float4 exponential(RValue<Float4> x, bool pp)
				169	{
				170	// FIXME: Propagate the constant
Alexis Hetu	0b7003b	2017-11-13 16:21:11 -0500	[diff] [blame]	171	return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	172	}
				173
				174	Float4 logarithm(RValue<Float4> x, bool absolute, bool pp)
				175	{
				176	// FIXME: Propagate the constant
				177	return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2)
				178	}
				179
				180	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
				181	{
				182	Float4 log = logarithm2(x, true, pp);
				183	log *= y;
				184	return exponential2(log, pp);
				185	}
				186
Nicolas Capens	05b3d66	2016-02-25 23:58:33 -0500	[diff] [blame]	187	Float4 reciprocal(RValue<Float4> x, bool pp, bool finite, bool exactAtPow2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	188	{
				189	Float4 rcp;
				190
				191	if(!pp && rcpPrecision >= WHQL)
				192	{
				193	rcp = Float4(1.0f) / x;
				194	}
				195	else
				196	{
Nicolas Capens	05b3d66	2016-02-25 23:58:33 -0500	[diff] [blame]	197	rcp = Rcp_pp(x, exactAtPow2);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	198
				199	if(!pp)
				200	{
				201	rcp = (rcp + rcp) - (x * rcp * rcp);
				202	}
				203	}
				204
				205	if(finite)
				206	{
				207	int big = 0x7F7FFFFF;
				208	rcp = Min(rcp, Float4((float&)big));
				209	}
				210
				211	return rcp;
				212	}
				213
				214	Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp)
				215	{
				216	Float4 abs = x;
				217
				218	if(absolute)
				219	{
				220	abs = Abs(abs);
				221	}
				222
				223	Float4 rsq;
				224
Alexis Hetu	a0ef97a	2017-11-13 17:31:20 -0500	[diff] [blame]	225	if(!pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	226	{
				227	rsq = Float4(1.0f) / Sqrt(abs);
				228	}
				229	else
				230	{
				231	rsq = RcpSqrt_pp(abs);
				232
				233	if(!pp)
				234	{
				235	rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f);
				236	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	237
Alexis Hetu	a0ef97a	2017-11-13 17:31:20 -0500	[diff] [blame]	238	rsq = As<Float4>(CmpNEQ(As<Int4>(abs), Int4(0x7F800000)) & As<Int4>(rsq));
				239	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	240
				241	return rsq;
				242	}
				243
				244	Float4 modulo(RValue<Float4> x, RValue<Float4> y)
				245	{
				246	return x - y * Floor(x / y);
				247	}
				248
				249	Float4 sine_pi(RValue<Float4> x, bool pp)
				250	{
				251	const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
				252	const Float4 B = Float4(1.27323954e+0f); // 4/pi
				253	const Float4 C = Float4(7.75160950e-1f);
				254	const Float4 D = Float4(2.24839049e-1f);
				255
				256	// Parabola approximating sine
				257	Float4 sin = x * (Abs(x) * A + B);
				258
				259	// Improve precision from 0.06 to 0.001
				260	if(true)
				261	{
				262	sin = sin * (Abs(sin) * D + C);
				263	}
				264
				265	return sin;
				266	}
				267
				268	Float4 cosine_pi(RValue<Float4> x, bool pp)
				269	{
				270	// cos(x) = sin(x + pi/2)
				271	Float4 y = x + Float4(1.57079632e+0f);
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	272
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	273	// Wrap around
				274	y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f)));
				275
				276	return sine_pi(y, pp);
				277	}
				278
				279	Float4 sine(RValue<Float4> x, bool pp)
				280	{
				281	// Reduce to [-0.5, 0.5] range
				282	Float4 y = x * Float4(1.59154943e-1f); // 1/2pi
				283	y = y - Round(y);
				284
Alexis Hetu	929c6b0	2017-11-07 16:04:25 -0500	[diff] [blame]	285	if(!pp)
				286	{
				287	// From the paper: "A Fast, Vectorizable Algorithm for Producing Single-Precision Sine-Cosine Pairs"
				288	// This implementation passes OpenGL ES 3.0 precision requirements, at the cost of more operations:
				289	// !pp : 17 mul, 7 add, 1 sub, 1 reciprocal
				290	// pp : 4 mul, 2 add, 2 abs
				291
				292	Float4 y2 = y * y;
				293	Float4 c1 = y2 * (y2 * (y2 * Float4(-0.0204391631f) + Float4(0.2536086171f)) + Float4(-1.2336977925f)) + Float4(1.0f);
				294	Float4 s1 = y * (y2 * (y2 * (y2 * Float4(-0.0046075748f) + Float4(0.0796819754f)) + Float4(-0.645963615f)) + Float4(1.5707963235f));
				295	Float4 c2 = (c1 * c1) - (s1 * s1);
				296	Float4 s2 = Float4(2.0f) * s1 * c1;
				297	return Float4(2.0f) * s2 * c2 * reciprocal(s2 * s2 + c2 * c2, pp, true);
				298	}
				299
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	300	const Float4 A = Float4(-16.0f);
				301	const Float4 B = Float4(8.0f);
				302	const Float4 C = Float4(7.75160950e-1f);
				303	const Float4 D = Float4(2.24839049e-1f);
				304
				305	// Parabola approximating sine
				306	Float4 sin = y * (Abs(y) * A + B);
				307
				308	// Improve precision from 0.06 to 0.001
				309	if(true)
				310	{
				311	sin = sin * (Abs(sin) * D + C);
				312	}
				313
				314	return sin;
				315	}
				316
				317	Float4 cosine(RValue<Float4> x, bool pp)
				318	{
				319	// cos(x) = sin(x + pi/2)
				320	Float4 y = x + Float4(1.57079632e+0f);
				321	return sine(y, pp);
				322	}
				323
				324	Float4 tangent(RValue<Float4> x, bool pp)
				325	{
				326	return sine(x, pp) / cosine(x, pp);
				327	}
				328
				329	Float4 arccos(RValue<Float4> x, bool pp)
				330	{
				331	// pi/2 - arcsin(x)
				332	return Float4(1.57079632e+0f) - arcsin(x);
				333	}
				334
				335	Float4 arcsin(RValue<Float4> x, bool pp)
				336	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	337	if(false) // Simpler implementation fails even lowp precision tests
				338	{
				339	// x(pi/2-sqrt(1-xx)*pi/5)
				340	return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - xx) Float4(6.28318531e-1f));
				341	}
				342	else
				343	{
				344	// From 4.4.45, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
				345	const Float4 half_pi(1.57079632f);
				346	const Float4 a0(1.5707288f);
				347	const Float4 a1(-0.2121144f);
				348	const Float4 a2(0.0742610f);
				349	const Float4 a3(-0.0187293f);
				350	Float4 absx = Abs(x);
				351	return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
				352	(As<Int4>(x) & Int4(0x80000000)));
				353	}
				354	}
				355
				356	// Approximation of atan in [0..1]
				357	Float4 arctan_01(Float4 x, bool pp)
				358	{
				359	if(pp)
				360	{
				361	return x * (Float4(-0.27f) * x + Float4(1.05539816f));
				362	}
				363	else
				364	{
				365	// From 4.4.49, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
				366	const Float4 a2(-0.3333314528f);
				367	const Float4 a4(0.1999355085f);
				368	const Float4 a6(-0.1420889944f);
				369	const Float4 a8(0.1065626393f);
				370	const Float4 a10(-0.0752896400f);
				371	const Float4 a12(0.0429096138f);
				372	const Float4 a14(-0.0161657367f);
				373	const Float4 a16(0.0028662257f);
				374	Float4 x2 = x * x;
				375	return (x + x * (x2 * (a2 + x2 * (a4 + x2 * (a6 + x2 * (a8 + x2 * (a10 + x2 * (a12 + x2 * (a14 + x2 * a16)))))))));
				376	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	377	}
				378
				379	Float4 arctan(RValue<Float4> x, bool pp)
				380	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	381	Float4 absx = Abs(x);
				382	Int4 O = CmpNLT(absx, Float4(1.0f));
				383	Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) \| (~O & As<Int4>(absx))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	384
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	385	const Float4 half_pi(1.57079632f);
				386	Float4 theta = arctan_01(y, pp);
				387	return As<Float4>(((O & As<Int4>(half_pi - theta)) \| (~O & As<Int4>(theta))) ^ // FIXME: Vector select
				388	(As<Int4>(x) & Int4(0x80000000)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	389	}
				390
				391	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
				392	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	393	const Float4 pi(3.14159265f); // pi
				394	const Float4 minus_pi(-3.14159265f); // -pi
				395	const Float4 half_pi(1.57079632f); // pi/2
				396	const Float4 quarter_pi(7.85398163e-1f); // pi/4
				397
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	398	// Rotate to upper semicircle when in lower semicircle
				399	Int4 S = CmpLT(y, Float4(0.0f));
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	400	Float4 theta = As<Float4>(S & As<Int4>(minus_pi));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	401	Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x));
				402	Float4 y0 = Abs(y);
				403
				404	// Rotate to right quadrant when in left quadrant
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	405	Int4 non_zero_y = CmpNEQ(y0, Float4(0.0f));
				406	Int4 Q = CmpLT(x0, Float4(0.0f)) & non_zero_y;
				407	theta += As<Float4>(Q & As<Int4>(half_pi));
				408	Float4 x1 = As<Float4>((Q & As<Int4>(y0)) \| (~Q & As<Int4>(x0))); // FIXME: Vector select
				409	Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) \| (~Q & As<Int4>(y0))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	410
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	411	// Mirror to first octant when in second octant
				412	Int4 O = CmpNLT(y1, x1) & non_zero_y;
				413	Float4 x2 = As<Float4>((O & As<Int4>(y1)) \| (~O & As<Int4>(x1))); // FIXME: Vector select
				414	Float4 y2 = As<Float4>((O & As<Int4>(x1)) \| (~O & As<Int4>(y1))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	415
				416	// Approximation of atan in [0..1]
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	417	Int4 zero_x = CmpEQ(x2, Float4(0.0f));
				418	Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
				419	Float4 atan2_theta = arctan_01(y2 / x2, pp);
				420	theta += As<Float4>((~zero_x & ~inf_y & non_zero_y & ((O & As<Int4>(half_pi - atan2_theta)) \| (~O & (As<Int4>(atan2_theta))))) \| // FIXME: Vector select
				421	(inf_y & As<Int4>(quarter_pi)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	422
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	423	// Recover loss of precision for tiny theta angles
				424	Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
				425	return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) \| (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	426	}
				427
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	428	Float4 sineh(RValue<Float4> x, bool pp)
				429	{
				430	return (exponential(x, pp) - exponential(-x, pp)) * Float4(0.5f);
				431	}
				432
				433	Float4 cosineh(RValue<Float4> x, bool pp)
				434	{
				435	return (exponential(x, pp) + exponential(-x, pp)) * Float4(0.5f);
				436	}
				437
				438	Float4 tangenth(RValue<Float4> x, bool pp)
				439	{
				440	Float4 e_x = exponential(x, pp);
				441	Float4 e_minus_x = exponential(-x, pp);
				442	return (e_x - e_minus_x) / (e_x + e_minus_x);
				443	}
				444
				445	Float4 arccosh(RValue<Float4> x, bool pp)
				446	{
				447	return logarithm(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)), pp);
				448	}
				449
				450	Float4 arcsinh(RValue<Float4> x, bool pp)
				451	{
				452	return logarithm(x + Sqrt(x * x + Float4(1.0f)), pp);
				453	}
				454
				455	Float4 arctanh(RValue<Float4> x, bool pp)
				456	{
				457	return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
				458	}
				459
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	460	Float4 dot2(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	461	{
				462	return v0.x * v1.x + v0.y * v1.y;
				463	}
				464
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	465	Float4 dot3(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	466	{
				467	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
				468	}
				469
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	470	Float4 dot4(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	471	{
				472	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
				473	}
				474
				475	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
				476	{
				477	Int2 tmp0 = UnpackHigh(row0, row1);
				478	Int2 tmp1 = UnpackHigh(row2, row3);
				479	Int2 tmp2 = UnpackLow(row0, row1);
				480	Int2 tmp3 = UnpackLow(row2, row3);
				481
Nicolas Capens	45f187a	2016-12-02 15:30:56 -0500	[diff] [blame]	482	row0 = UnpackLow(tmp2, tmp3);
				483	row1 = UnpackHigh(tmp2, tmp3);
				484	row2 = UnpackLow(tmp0, tmp1);
				485	row3 = UnpackHigh(tmp0, tmp1);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	486	}
				487
Nicolas Capens	e4a88b9	2017-11-30 00:14:57 -0500	[diff] [blame]	488	void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
				489	{
				490	Int2 tmp0 = UnpackHigh(row0, row1);
				491	Int2 tmp1 = UnpackHigh(row2, row3);
				492	Int2 tmp2 = UnpackLow(row0, row1);
				493	Int2 tmp3 = UnpackLow(row2, row3);
				494
				495	row0 = UnpackLow(tmp2, tmp3);
				496	row1 = UnpackHigh(tmp2, tmp3);
				497	row2 = UnpackLow(tmp0, tmp1);
				498	}
				499
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	500	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				501	{
				502	Float4 tmp0 = UnpackLow(row0, row1);
				503	Float4 tmp1 = UnpackLow(row2, row3);
				504	Float4 tmp2 = UnpackHigh(row0, row1);
				505	Float4 tmp3 = UnpackHigh(row2, row3);
				506
				507	row0 = Float4(tmp0.xy, tmp1.xy);
				508	row1 = Float4(tmp0.zw, tmp1.zw);
				509	row2 = Float4(tmp2.xy, tmp3.xy);
				510	row3 = Float4(tmp2.zw, tmp3.zw);
				511	}
				512
				513	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				514	{
				515	Float4 tmp0 = UnpackLow(row0, row1);
				516	Float4 tmp1 = UnpackLow(row2, row3);
				517	Float4 tmp2 = UnpackHigh(row0, row1);
				518	Float4 tmp3 = UnpackHigh(row2, row3);
				519
				520	row0 = Float4(tmp0.xy, tmp1.xy);
				521	row1 = Float4(tmp0.zw, tmp1.zw);
				522	row2 = Float4(tmp2.xy, tmp3.xy);
				523	}
				524
				525	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				526	{
				527	Float4 tmp0 = UnpackLow(row0, row1);
				528	Float4 tmp1 = UnpackLow(row2, row3);
				529
				530	row0 = Float4(tmp0.xy, tmp1.xy);
				531	row1 = Float4(tmp0.zw, tmp1.zw);
				532	}
				533
				534	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				535	{
				536	Float4 tmp0 = UnpackLow(row0, row1);
				537	Float4 tmp1 = UnpackLow(row2, row3);
				538
				539	row0 = Float4(tmp0.xy, tmp1.xy);
				540	}
				541
				542	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				543	{
Nicolas Capens	54ac5e8	2016-12-09 14:07:50 -0500	[diff] [blame]	544	Float4 tmp01 = UnpackLow(row0, row1);
				545	Float4 tmp23 = UnpackHigh(row0, row1);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	546
Nicolas Capens	54ac5e8	2016-12-09 14:07:50 -0500	[diff] [blame]	547	row0 = tmp01;
				548	row1 = Float4(tmp01.zw, row1.zw);
				549	row2 = tmp23;
				550	row3 = Float4(tmp23.zw, row3.zw);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	551	}
				552
				553	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
				554	{
				555	switch(N)
				556	{
				557	case 1: transpose4x1(row0, row1, row2, row3); break;
				558	case 2: transpose4x2(row0, row1, row2, row3); break;
				559	case 3: transpose4x3(row0, row1, row2, row3); break;
				560	case 4: transpose4x4(row0, row1, row2, row3); break;
				561	}
				562	}
				563
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	564	void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	565	{
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	566	if(integerDestination)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	567	{
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	568	dst.x = As<Float4>(RoundInt(src.x));
				569	dst.y = As<Float4>(RoundInt(src.y));
				570	dst.z = As<Float4>(RoundInt(src.z));
				571	dst.w = As<Float4>(RoundInt(src.w));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	572	}
				573	else
				574	{
				575	dst = src;
				576	}
				577	}
				578
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	579	void ShaderCore::neg(Vector4f &dst, const Vector4f &src)
				580	{
				581	dst.x = -src.x;
				582	dst.y = -src.y;
				583	dst.z = -src.z;
				584	dst.w = -src.w;
				585	}
				586
				587	void ShaderCore::ineg(Vector4f &dst, const Vector4f &src)
				588	{
				589	dst.x = As<Float4>(-As<Int4>(src.x));
				590	dst.y = As<Float4>(-As<Int4>(src.y));
				591	dst.z = As<Float4>(-As<Int4>(src.z));
				592	dst.w = As<Float4>(-As<Int4>(src.w));
				593	}
				594
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	595	void ShaderCore::f2b(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	596	{
				597	dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f)));
				598	dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f)));
				599	dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f)));
				600	dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f)));
				601	}
				602
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	603	void ShaderCore::b2f(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	604	{
				605	dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f)));
				606	dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f)));
				607	dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f)));
				608	dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f)));
				609	}
				610
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	611	void ShaderCore::f2i(Vector4f &dst, const Vector4f &src)
				612	{
				613	dst.x = As<Float4>(Int4(src.x));
				614	dst.y = As<Float4>(Int4(src.y));
				615	dst.z = As<Float4>(Int4(src.z));
				616	dst.w = As<Float4>(Int4(src.w));
				617	}
				618
				619	void ShaderCore::i2f(Vector4f &dst, const Vector4f &src)
				620	{
				621	dst.x = Float4(As<Int4>(src.x));
				622	dst.y = Float4(As<Int4>(src.y));
				623	dst.z = Float4(As<Int4>(src.z));
				624	dst.w = Float4(As<Int4>(src.w));
				625	}
				626
				627	void ShaderCore::f2u(Vector4f &dst, const Vector4f &src)
				628	{
				629	dst.x = As<Float4>(UInt4(src.x));
				630	dst.y = As<Float4>(UInt4(src.y));
				631	dst.z = As<Float4>(UInt4(src.z));
				632	dst.w = As<Float4>(UInt4(src.w));
				633	}
				634
				635	void ShaderCore::u2f(Vector4f &dst, const Vector4f &src)
				636	{
				637	dst.x = Float4(As<UInt4>(src.x));
				638	dst.y = Float4(As<UInt4>(src.y));
				639	dst.z = Float4(As<UInt4>(src.z));
				640	dst.w = Float4(As<UInt4>(src.w));
				641	}
				642
				643	void ShaderCore::i2b(Vector4f &dst, const Vector4f &src)
				644	{
				645	dst.x = As<Float4>(CmpNEQ(As<Int4>(src.x), Int4(0)));
				646	dst.y = As<Float4>(CmpNEQ(As<Int4>(src.y), Int4(0)));
				647	dst.z = As<Float4>(CmpNEQ(As<Int4>(src.z), Int4(0)));
				648	dst.w = As<Float4>(CmpNEQ(As<Int4>(src.w), Int4(0)));
				649	}
				650
				651	void ShaderCore::b2i(Vector4f &dst, const Vector4f &src)
				652	{
				653	dst.x = As<Float4>(As<Int4>(src.x) & Int4(1));
				654	dst.y = As<Float4>(As<Int4>(src.y) & Int4(1));
				655	dst.z = As<Float4>(As<Int4>(src.z) & Int4(1));
				656	dst.w = As<Float4>(As<Int4>(src.w) & Int4(1));
				657	}
				658
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	659	void ShaderCore::add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	660	{
				661	dst.x = src0.x + src1.x;
				662	dst.y = src0.y + src1.y;
				663	dst.z = src0.z + src1.z;
				664	dst.w = src0.w + src1.w;
				665	}
				666
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	667	void ShaderCore::iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				668	{
				669	dst.x = As<Float4>(As<Int4>(src0.x) + As<Int4>(src1.x));
				670	dst.y = As<Float4>(As<Int4>(src0.y) + As<Int4>(src1.y));
				671	dst.z = As<Float4>(As<Int4>(src0.z) + As<Int4>(src1.z));
				672	dst.w = As<Float4>(As<Int4>(src0.w) + As<Int4>(src1.w));
				673	}
				674
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	675	void ShaderCore::sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	676	{
				677	dst.x = src0.x - src1.x;
				678	dst.y = src0.y - src1.y;
				679	dst.z = src0.z - src1.z;
				680	dst.w = src0.w - src1.w;
				681	}
				682
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	683	void ShaderCore::isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				684	{
				685	dst.x = As<Float4>(As<Int4>(src0.x) - As<Int4>(src1.x));
				686	dst.y = As<Float4>(As<Int4>(src0.y) - As<Int4>(src1.y));
				687	dst.z = As<Float4>(As<Int4>(src0.z) - As<Int4>(src1.z));
				688	dst.w = As<Float4>(As<Int4>(src0.w) - As<Int4>(src1.w));
				689	}
				690
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	691	void ShaderCore::mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	692	{
				693	dst.x = src0.x * src1.x + src2.x;
				694	dst.y = src0.y * src1.y + src2.y;
				695	dst.z = src0.z * src1.z + src2.z;
				696	dst.w = src0.w * src1.w + src2.w;
				697	}
				698
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	699	void ShaderCore::imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
				700	{
				701	dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x) + As<Int4>(src2.x));
				702	dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y) + As<Int4>(src2.y));
				703	dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z) + As<Int4>(src2.z));
				704	dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w) + As<Int4>(src2.w));
				705	}
				706
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	707	void ShaderCore::mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	708	{
				709	dst.x = src0.x * src1.x;
				710	dst.y = src0.y * src1.y;
				711	dst.z = src0.z * src1.z;
				712	dst.w = src0.w * src1.w;
				713	}
				714
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	715	void ShaderCore::imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				716	{
				717	dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x));
				718	dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y));
				719	dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z));
				720	dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w));
				721	}
				722
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	723	void ShaderCore::rcpx(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	724	{
Nicolas Capens	af13df4	2018-01-09 16:27:15 -0500	[diff] [blame]	725	Float4 rcp = reciprocal(src.x, pp, true, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	726
				727	dst.x = rcp;
				728	dst.y = rcp;
				729	dst.z = rcp;
				730	dst.w = rcp;
				731	}
				732
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	733	void ShaderCore::div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	734	{
				735	dst.x = src0.x / src1.x;
				736	dst.y = src0.y / src1.y;
				737	dst.z = src0.z / src1.z;
				738	dst.w = src0.w / src1.w;
				739	}
				740
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	741	void ShaderCore::idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				742	{
				743	Float4 intMax(As<Float4>(Int4(INT_MAX)));
				744	cmp0i(dst.x, src1.x, intMax, src1.x);
				745	dst.x = As<Float4>(As<Int4>(src0.x) / As<Int4>(dst.x));
				746	cmp0i(dst.y, src1.y, intMax, src1.y);
				747	dst.y = As<Float4>(As<Int4>(src0.y) / As<Int4>(dst.y));
				748	cmp0i(dst.z, src1.z, intMax, src1.z);
				749	dst.z = As<Float4>(As<Int4>(src0.z) / As<Int4>(dst.z));
				750	cmp0i(dst.w, src1.w, intMax, src1.w);
				751	dst.w = As<Float4>(As<Int4>(src0.w) / As<Int4>(dst.w));
				752	}
				753
				754	void ShaderCore::udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				755	{
				756	Float4 uintMax(As<Float4>(UInt4(UINT_MAX)));
				757	cmp0i(dst.x, src1.x, uintMax, src1.x);
				758	dst.x = As<Float4>(As<UInt4>(src0.x) / As<UInt4>(dst.x));
				759	cmp0i(dst.y, src1.y, uintMax, src1.y);
				760	dst.y = As<Float4>(As<UInt4>(src0.y) / As<UInt4>(dst.y));
				761	cmp0i(dst.z, src1.z, uintMax, src1.z);
				762	dst.z = As<Float4>(As<UInt4>(src0.z) / As<UInt4>(dst.z));
				763	cmp0i(dst.w, src1.w, uintMax, src1.w);
				764	dst.w = As<Float4>(As<UInt4>(src0.w) / As<UInt4>(dst.w));
				765	}
				766
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	767	void ShaderCore::mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	768	{
				769	dst.x = modulo(src0.x, src1.x);
				770	dst.y = modulo(src0.y, src1.y);
				771	dst.z = modulo(src0.z, src1.z);
				772	dst.w = modulo(src0.w, src1.w);
				773	}
				774
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	775	void ShaderCore::imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				776	{
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	777	Float4 intMax(As<Float4>(Int4(INT_MAX)));
				778	cmp0i(dst.x, src1.x, intMax, src1.x);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	779	dst.x = As<Float4>(As<Int4>(src0.x) % As<Int4>(dst.x));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	780	cmp0i(dst.y, src1.y, intMax, src1.y);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	781	dst.y = As<Float4>(As<Int4>(src0.y) % As<Int4>(dst.y));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	782	cmp0i(dst.z, src1.z, intMax, src1.z);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	783	dst.z = As<Float4>(As<Int4>(src0.z) % As<Int4>(dst.z));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	784	cmp0i(dst.w, src1.w, intMax, src1.w);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	785	dst.w = As<Float4>(As<Int4>(src0.w) % As<Int4>(dst.w));
				786	}
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	787
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	788	void ShaderCore::umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				789	{
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	790	Float4 uintMax(As<Float4>(UInt4(UINT_MAX)));
				791	cmp0i(dst.x, src1.x, uintMax, src1.x);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	792	dst.x = As<Float4>(As<UInt4>(src0.x) % As<UInt4>(dst.x));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	793	cmp0i(dst.y, src1.y, uintMax, src1.y);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	794	dst.y = As<Float4>(As<UInt4>(src0.y) % As<UInt4>(dst.y));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	795	cmp0i(dst.z, src1.z, uintMax, src1.z);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	796	dst.z = As<Float4>(As<UInt4>(src0.z) % As<UInt4>(dst.z));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	797	cmp0i(dst.w, src1.w, uintMax, src1.w);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	798	dst.w = As<Float4>(As<UInt4>(src0.w) % As<UInt4>(dst.w));
				799	}
				800
				801	void ShaderCore::shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				802	{
				803	dst.x = As<Float4>(As<Int4>(src0.x) << As<Int4>(src1.x));
				804	dst.y = As<Float4>(As<Int4>(src0.y) << As<Int4>(src1.y));
				805	dst.z = As<Float4>(As<Int4>(src0.z) << As<Int4>(src1.z));
				806	dst.w = As<Float4>(As<Int4>(src0.w) << As<Int4>(src1.w));
				807	}
				808
				809	void ShaderCore::ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				810	{
				811	dst.x = As<Float4>(As<Int4>(src0.x) >> As<Int4>(src1.x));
				812	dst.y = As<Float4>(As<Int4>(src0.y) >> As<Int4>(src1.y));
				813	dst.z = As<Float4>(As<Int4>(src0.z) >> As<Int4>(src1.z));
				814	dst.w = As<Float4>(As<Int4>(src0.w) >> As<Int4>(src1.w));
				815	}
				816
				817	void ShaderCore::ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				818	{
				819	dst.x = As<Float4>(As<UInt4>(src0.x) >> As<UInt4>(src1.x));
				820	dst.y = As<Float4>(As<UInt4>(src0.y) >> As<UInt4>(src1.y));
				821	dst.z = As<Float4>(As<UInt4>(src0.z) >> As<UInt4>(src1.z));
				822	dst.w = As<Float4>(As<UInt4>(src0.w) >> As<UInt4>(src1.w));
				823	}
				824
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	825	void ShaderCore::rsqx(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	826	{
				827	Float4 rsq = reciprocalSquareRoot(src.x, true, pp);
				828
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	829	dst.x = rsq;
				830	dst.y = rsq;
				831	dst.z = rsq;
				832	dst.w = rsq;
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	833	}
				834
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	835	void ShaderCore::sqrt(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	836	{
				837	dst.x = Sqrt(src.x);
				838	dst.y = Sqrt(src.y);
				839	dst.z = Sqrt(src.z);
				840	dst.w = Sqrt(src.w);
				841	}
				842
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	843	void ShaderCore::rsq(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	844	{
				845	dst.x = reciprocalSquareRoot(src.x, false, pp);
				846	dst.y = reciprocalSquareRoot(src.y, false, pp);
				847	dst.z = reciprocalSquareRoot(src.z, false, pp);
				848	dst.w = reciprocalSquareRoot(src.w, false, pp);
				849	}
				850
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	851	void ShaderCore::len2(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	852	{
				853	dst = Sqrt(dot2(src, src));
				854	}
				855
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	856	void ShaderCore::len3(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	857	{
				858	dst = Sqrt(dot3(src, src));
				859	}
				860
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	861	void ShaderCore::len4(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	862	{
				863	dst = Sqrt(dot4(src, src));
				864	}
				865
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	866	void ShaderCore::dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	867	{
				868	dst = Abs(src0.x - src1.x);
				869	}
				870
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	871	void ShaderCore::dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	872	{
				873	Float4 dx = src0.x - src1.x;
				874	Float4 dy = src0.y - src1.y;
				875	Float4 dot2 = dx * dx + dy * dy;
				876	dst = Sqrt(dot2);
				877	}
				878
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	879	void ShaderCore::dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	880	{
				881	Float4 dx = src0.x - src1.x;
				882	Float4 dy = src0.y - src1.y;
				883	Float4 dz = src0.z - src1.z;
				884	Float4 dot3 = dx * dx + dy * dy + dz * dz;
				885	dst = Sqrt(dot3);
				886	}
				887
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	888	void ShaderCore::dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	889	{
				890	Float4 dx = src0.x - src1.x;
				891	Float4 dy = src0.y - src1.y;
				892	Float4 dz = src0.z - src1.z;
				893	Float4 dw = src0.w - src1.w;
				894	Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw;
				895	dst = Sqrt(dot4);
				896	}
				897
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	898	void ShaderCore::dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	899	{
				900	Float4 t = src0.x * src1.x;
				901
				902	dst.x = t;
				903	dst.y = t;
				904	dst.z = t;
				905	dst.w = t;
				906	}
				907
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	908	void ShaderCore::dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	909	{
				910	Float4 t = dot2(src0, src1);
				911
				912	dst.x = t;
				913	dst.y = t;
				914	dst.z = t;
				915	dst.w = t;
				916	}
				917
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	918	void ShaderCore::dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	919	{
				920	Float4 t = dot2(src0, src1) + src2.x;
				921
				922	dst.x = t;
				923	dst.y = t;
				924	dst.z = t;
				925	dst.w = t;
				926	}
				927
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	928	void ShaderCore::dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	929	{
				930	Float4 dot = dot3(src0, src1);
				931
				932	dst.x = dot;
				933	dst.y = dot;
				934	dst.z = dot;
				935	dst.w = dot;
				936	}
				937
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	938	void ShaderCore::dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	939	{
				940	Float4 dot = dot4(src0, src1);
				941
				942	dst.x = dot;
				943	dst.y = dot;
				944	dst.z = dot;
				945	dst.w = dot;
				946	}
				947
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	948	void ShaderCore::min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	949	{
				950	dst.x = Min(src0.x, src1.x);
				951	dst.y = Min(src0.y, src1.y);
				952	dst.z = Min(src0.z, src1.z);
				953	dst.w = Min(src0.w, src1.w);
				954	}
				955
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	956	void ShaderCore::imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				957	{
				958	dst.x = As<Float4>(Min(As<Int4>(src0.x), As<Int4>(src1.x)));
				959	dst.y = As<Float4>(Min(As<Int4>(src0.y), As<Int4>(src1.y)));
				960	dst.z = As<Float4>(Min(As<Int4>(src0.z), As<Int4>(src1.z)));
				961	dst.w = As<Float4>(Min(As<Int4>(src0.w), As<Int4>(src1.w)));
				962	}
				963
				964	void ShaderCore::umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				965	{
				966	dst.x = As<Float4>(Min(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				967	dst.y = As<Float4>(Min(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				968	dst.z = As<Float4>(Min(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				969	dst.w = As<Float4>(Min(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				970	}
				971
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	972	void ShaderCore::max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	973	{
				974	dst.x = Max(src0.x, src1.x);
				975	dst.y = Max(src0.y, src1.y);
				976	dst.z = Max(src0.z, src1.z);
				977	dst.w = Max(src0.w, src1.w);
				978	}
				979
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	980	void ShaderCore::imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				981	{
				982	dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
				983	dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
				984	dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
				985	dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
				986	}
				987
				988	void ShaderCore::umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				989	{
				990	dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
				991	dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
				992	dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
				993	dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
				994	}
				995
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	996	void ShaderCore::slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	997	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	998	dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f)));
				999	dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f)));
				1000	dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f)));
				1001	dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1002	}
				1003
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1004	void ShaderCore::step(Vector4f &dst, const Vector4f &edge, const Vector4f &x)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1005	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1006	dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f)));
				1007	dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f)));
				1008	dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f)));
				1009	dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1010	}
				1011
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1012	void ShaderCore::exp2x(Vector4f &dst, const Vector4f &src, bool pp)
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1013	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1014	Float4 exp = exponential2(src.x, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1015
				1016	dst.x = exp;
				1017	dst.y = exp;
				1018	dst.z = exp;
				1019	dst.w = exp;
				1020	}
				1021
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1022	void ShaderCore::exp2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1023	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1024	dst.x = exponential2(src.x, pp);
				1025	dst.y = exponential2(src.y, pp);
				1026	dst.z = exponential2(src.z, pp);
				1027	dst.w = exponential2(src.w, pp);
				1028	}
				1029
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1030	void ShaderCore::exp(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1031	{
				1032	dst.x = exponential(src.x, pp);
				1033	dst.y = exponential(src.y, pp);
				1034	dst.z = exponential(src.z, pp);
				1035	dst.w = exponential(src.w, pp);
				1036	}
				1037
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1038	void ShaderCore::log2x(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1039	{
				1040	Float4 log = logarithm2(src.x, true, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1041
				1042	dst.x = log;
				1043	dst.y = log;
				1044	dst.z = log;
				1045	dst.w = log;
				1046	}
				1047
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1048	void ShaderCore::log2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1049	{
Alexis Hetu	a781af7	2017-07-06 17:12:47 -0400	[diff] [blame]	1050	dst.x = logarithm2(src.x, false, pp);
				1051	dst.y = logarithm2(src.y, false, pp);
				1052	dst.z = logarithm2(src.z, false, pp);
				1053	dst.w = logarithm2(src.w, false, pp);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1054	}
				1055
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1056	void ShaderCore::log(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1057	{
				1058	dst.x = logarithm(src.x, false, pp);
				1059	dst.y = logarithm(src.y, false, pp);
				1060	dst.z = logarithm(src.z, false, pp);
				1061	dst.w = logarithm(src.w, false, pp);
				1062	}
				1063
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1064	void ShaderCore::lit(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1065	{
				1066	dst.x = Float4(1.0f);
				1067	dst.y = Max(src.x, Float4(0.0f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1068
				1069	Float4 pow;
				1070
				1071	pow = src.w;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1072	pow = Min(pow, Float4(127.9961f));
				1073	pow = Max(pow, Float4(-127.9961f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1074
				1075	dst.z = power(src.y, pow);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1076	dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f)));
				1077	dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1078
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1079	dst.w = Float4(1.0f);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1080	}
				1081
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1082	void ShaderCore::att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1083	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1084	// Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1085	dst.x = 1;
				1086	dst.y = src0.y * src1.y;
				1087	dst.z = src0.z;
				1088	dst.w = src1.w;
				1089	}
				1090
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1091	void ShaderCore::lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1092	{
				1093	dst.x = src0.x * (src1.x - src2.x) + src2.x;
				1094	dst.y = src0.y * (src1.y - src2.y) + src2.y;
				1095	dst.z = src0.z * (src1.z - src2.z) + src2.z;
				1096	dst.w = src0.w * (src1.w - src2.w) + src2.w;
				1097	}
				1098
Alexis Hetu	8ef6d10	2017-11-09 15:49:09 -0500	[diff] [blame]	1099	void ShaderCore::isinf(Vector4f &dst, const Vector4f &src)
				1100	{
				1101	dst.x = As<Float4>(IsInf(src.x));
				1102	dst.y = As<Float4>(IsInf(src.y));
				1103	dst.z = As<Float4>(IsInf(src.z));
				1104	dst.w = As<Float4>(IsInf(src.w));
				1105	}
				1106
				1107	void ShaderCore::isnan(Vector4f &dst, const Vector4f &src)
				1108	{
				1109	dst.x = As<Float4>(IsNan(src.x));
				1110	dst.y = As<Float4>(IsNan(src.y));
				1111	dst.z = As<Float4>(IsNan(src.z));
				1112	dst.w = As<Float4>(IsNan(src.w));
				1113	}
				1114
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1115	void ShaderCore::smooth(Vector4f &dst, const Vector4f &edge0, const Vector4f &edge1, const Vector4f &x)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1116	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1117	Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx);
				1118	Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty);
				1119	Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz);
				1120	Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1121	}
				1122
Alexis Hetu	ffb35eb	2016-04-06 18:05:00 -0400	[diff] [blame]	1123	void ShaderCore::floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits)
				1124	{
				1125	static const uint32_t mask_sign = 0x80000000u;
				1126	static const uint32_t mask_round = ~0xfffu;
				1127	static const uint32_t c_f32infty = 255 << 23;
				1128	static const uint32_t c_magic = 15 << 23;
				1129	static const uint32_t c_nanbit = 0x200;
				1130	static const uint32_t c_infty_as_fp16 = 0x7c00;
				1131	static const uint32_t c_clamp = (31 << 23) - 0x1000;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1132
Alexis Hetu	ffb35eb	2016-04-06 18:05:00 -0400	[diff] [blame]	1133	UInt4 justsign = UInt4(mask_sign) & As<UInt4>(floatBits);
				1134	UInt4 absf = As<UInt4>(floatBits) ^ justsign;
				1135	UInt4 b_isnormal = CmpNLE(UInt4(c_f32infty), absf);
				1136
				1137	// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
				1138	// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
				1139	UInt4 joined = ((((As<UInt4>(Min(As<Float4>(absf & UInt4(mask_round)) * As<Float4>(UInt4(c_magic)),
				1140	As<Float4>(UInt4(c_clamp))))) - UInt4(mask_round)) >> 13) & b_isnormal) \|
				1141	((b_isnormal ^ UInt4(0xFFFFFFFF)) & ((CmpNLE(absf, UInt4(c_f32infty)) & UInt4(c_nanbit)) \|
				1142	UInt4(c_infty_as_fp16)));
				1143
				1144	dst = As<Float4>(storeInUpperBits ? As<UInt4>(dst) \| ((joined << 16) \| justsign) : joined \| (justsign >> 16));
				1145	}
				1146
				1147	void ShaderCore::halfToFloatBits(Float4& dst, const Float4& halfBits)
				1148	{
				1149	static const uint32_t mask_nosign = 0x7FFF;
				1150	static const uint32_t magic = (254 - 15) << 23;
				1151	static const uint32_t was_infnan = 0x7BFF;
				1152	static const uint32_t exp_infnan = 255 << 23;
				1153
				1154	UInt4 expmant = As<UInt4>(halfBits) & UInt4(mask_nosign);
				1155	dst = As<Float4>(As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) \|
				1156	((As<UInt4>(halfBits) ^ UInt4(expmant)) << 16) \|
				1157	(CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan)));
				1158	}
				1159
				1160	void ShaderCore::packHalf2x16(Vector4f &d, const Vector4f &s0)
				1161	{
				1162	// half2 \| half1
				1163	floatToHalfBits(d.x, s0.x, false);
				1164	floatToHalfBits(d.x, s0.y, true);
				1165	}
				1166
				1167	void ShaderCore::unpackHalf2x16(Vector4f &dst, const Vector4f &s0)
				1168	{
				1169	// half2 \| half1
				1170	halfToFloatBits(dst.x, As<Float4>(As<UInt4>(s0.x) & UInt4(0x0000FFFF)));
				1171	halfToFloatBits(dst.y, As<Float4>((As<UInt4>(s0.x) & UInt4(0xFFFF0000)) >> 16));
				1172	}
				1173
Alexis Hetu	9cde974	2016-04-06 13:03:38 -0400	[diff] [blame]	1174	void ShaderCore::packSnorm2x16(Vector4f &d, const Vector4f &s0)
				1175	{
				1176	// round(clamp(c, -1.0, 1.0) * 32767.0)
				1177	d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) \|
				1178	((Int4(Round(Min(Max(s0.y, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) << 16));
				1179	}
				1180
				1181	void ShaderCore::packUnorm2x16(Vector4f &d, const Vector4f &s0)
				1182	{
				1183	// round(clamp(c, 0.0, 1.0) * 65535.0)
				1184	d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) \|
				1185	((Int4(Round(Min(Max(s0.y, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) << 16));
				1186	}
				1187
				1188	void ShaderCore::unpackSnorm2x16(Vector4f &dst, const Vector4f &s0)
				1189	{
				1190	// clamp(f / 32727.0, -1.0, 1.0)
				1191	dst.x = Min(Max(Float4(As<Int4>((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16)) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f));
				1192	dst.y = Min(Max(Float4(As<Int4>(As<UInt4>(s0.x) & UInt4(0xFFFF0000))) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f));
				1193	}
				1194
				1195	void ShaderCore::unpackUnorm2x16(Vector4f &dst, const Vector4f &s0)
				1196	{
				1197	// f / 65535.0
				1198	dst.x = Float4((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16) * Float4(1.0f / float(0xFFFF0000));
				1199	dst.y = Float4(As<UInt4>(s0.x) & UInt4(0xFFFF0000)) * Float4(1.0f / float(0xFFFF0000));
				1200	}
				1201
Alexis Hetu	c3d95f3	2015-09-23 12:27:32 -0400	[diff] [blame]	1202	void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1203	{
				1204	dst.x = src0.x * src1.y - src0.y * src1.x;
				1205	dst.y = dst.z = dst.w = dst.x;
				1206	}
				1207
				1208	void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
				1209	{
				1210	crs(dst, src1, src2);
				1211	dp3(dst, dst, src0);
				1212	}
				1213
				1214	void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
				1215	{
				1216	dst.x = src2.z * src3.w - src2.w * src3.z;
				1217	dst.y = src1.w * src3.z - src1.z * src3.w;
				1218	dst.z = src1.z * src2.w - src1.w * src2.z;
				1219	dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
				1220	src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
				1221	src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
				1222	src2.x * (src1.w * src3.y - src1.y * src3.w) +
				1223	src3.x * (src1.y * src2.w - src1.w * src2.y)) +
				1224	src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
				1225	src2.x * (src1.y * src3.z - src1.z * src3.y) +
				1226	src3.x * (src1.z * src2.y - src1.y * src2.z));
				1227	dst.y = dst.z = dst.w = dst.x;
				1228	}
				1229
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1230	void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1231	{
				1232	dst.x = Frac(src.x);
				1233	dst.y = Frac(src.y);
				1234	dst.z = Frac(src.z);
				1235	dst.w = Frac(src.w);
				1236	}
				1237
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1238	void ShaderCore::trunc(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1239	{
				1240	dst.x = Trunc(src.x);
				1241	dst.y = Trunc(src.y);
				1242	dst.z = Trunc(src.z);
				1243	dst.w = Trunc(src.w);
				1244	}
				1245
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1246	void ShaderCore::floor(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1247	{
				1248	dst.x = Floor(src.x);
				1249	dst.y = Floor(src.y);
				1250	dst.z = Floor(src.z);
				1251	dst.w = Floor(src.w);
				1252	}
				1253
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1254	void ShaderCore::round(Vector4f &dst, const Vector4f &src)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1255	{
				1256	dst.x = Round(src.x);
				1257	dst.y = Round(src.y);
				1258	dst.z = Round(src.z);
				1259	dst.w = Round(src.w);
				1260	}
				1261
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1262	void ShaderCore::roundEven(Vector4f &dst, const Vector4f &src)
Alexis Hetu	8e851c1	2015-06-04 11:30:54 -0400	[diff] [blame]	1263	{
				1264	// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
				1265	// ex.: 1.5: 2 + (0 * 2 - 1) * 1 * 0 = 2
				1266	// 2.5: 3 + (0 * 2 - 1) * 1 * 1 = 2
				1267	// -1.5: -2 + (1 * 2 - 1) * 1 * 0 = -2
				1268	// -2.5: -3 + (1 * 2 - 1) * 1 * 1 = -2
				1269	// Even if the round implementation rounds the other way:
				1270	// 1.5: 1 + (1 * 2 - 1) * 1 * 1 = 2
				1271	// 2.5: 2 + (1 * 2 - 1) * 1 * 0 = 2
				1272	// -1.5: -1 + (0 * 2 - 1) * 1 * 1 = -2
				1273	// -2.5: -2 + (0 * 2 - 1) * 1 * 0 = -2
				1274	round(dst, src);
				1275	dst.x += ((Float4(CmpLT(dst.x, src.x) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.x), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.x) & Int4(1));
				1276	dst.y += ((Float4(CmpLT(dst.y, src.y) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.y), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.y) & Int4(1));
				1277	dst.z += ((Float4(CmpLT(dst.z, src.z) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.z), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.z) & Int4(1));
				1278	dst.w += ((Float4(CmpLT(dst.w, src.w) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.w), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.w) & Int4(1));
				1279	}
				1280
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1281	void ShaderCore::ceil(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1282	{
				1283	dst.x = Ceil(src.x);
				1284	dst.y = Ceil(src.y);
				1285	dst.z = Ceil(src.z);
				1286	dst.w = Ceil(src.w);
				1287	}
				1288
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1289	void ShaderCore::powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1290	{
				1291	Float4 pow = power(src0.x, src1.x, pp);
				1292
				1293	dst.x = pow;
				1294	dst.y = pow;
				1295	dst.z = pow;
				1296	dst.w = pow;
				1297	}
				1298
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1299	void ShaderCore::pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1300	{
				1301	dst.x = power(src0.x, src1.x, pp);
				1302	dst.y = power(src0.y, src1.y, pp);
				1303	dst.z = power(src0.z, src1.z, pp);
				1304	dst.w = power(src0.w, src1.w, pp);
				1305	}
				1306
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1307	void ShaderCore::crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1308	{
				1309	dst.x = src0.y * src1.z - src0.z * src1.y;
				1310	dst.y = src0.z * src1.x - src0.x * src1.z;
				1311	dst.z = src0.x * src1.y - src0.y * src1.x;
				1312	}
				1313
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1314	void ShaderCore::forward1(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1315	{
				1316	Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000);
				1317
				1318	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1319	}
				1320
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1321	void ShaderCore::forward2(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1322	{
				1323	Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1324
				1325	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1326	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1327	}
				1328
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1329	void ShaderCore::forward3(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1330	{
				1331	Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1332
				1333	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1334	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1335	dst.z = As<Float4>(flip ^ As<Int4>(N.z));
				1336	}
				1337
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1338	void ShaderCore::forward4(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1339	{
				1340	Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1341
				1342	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1343	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1344	dst.z = As<Float4>(flip ^ As<Int4>(N.z));
				1345	dst.w = As<Float4>(flip ^ As<Int4>(N.w));
				1346	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1347
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1348	void ShaderCore::reflect1(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1349	{
				1350	Float4 d = N.x * I.x;
				1351
				1352	dst.x = I.x - Float4(2.0f) * d * N.x;
				1353	}
				1354
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1355	void ShaderCore::reflect2(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1356	{
				1357	Float4 d = dot2(N, I);
				1358
				1359	dst.x = I.x - Float4(2.0f) * d * N.x;
				1360	dst.y = I.y - Float4(2.0f) * d * N.y;
				1361	}
				1362
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1363	void ShaderCore::reflect3(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1364	{
				1365	Float4 d = dot3(N, I);
				1366
				1367	dst.x = I.x - Float4(2.0f) * d * N.x;
				1368	dst.y = I.y - Float4(2.0f) * d * N.y;
				1369	dst.z = I.z - Float4(2.0f) * d * N.z;
				1370	}
				1371
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1372	void ShaderCore::reflect4(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1373	{
				1374	Float4 d = dot4(N, I);
				1375
				1376	dst.x = I.x - Float4(2.0f) * d * N.x;
				1377	dst.y = I.y - Float4(2.0f) * d * N.y;
				1378	dst.z = I.z - Float4(2.0f) * d * N.z;
				1379	dst.w = I.w - Float4(2.0f) * d * N.w;
				1380	}
				1381
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1382	void ShaderCore::refract1(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1383	{
				1384	Float4 d = N.x * I.x;
				1385	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1386	Int4 pos = CmpNLT(k, Float4(0.0f));
				1387	Float4 t = (eta * d + Sqrt(k));
				1388
				1389	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1390	}
				1391
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1392	void ShaderCore::refract2(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1393	{
				1394	Float4 d = dot2(N, I);
				1395	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1396	Int4 pos = CmpNLT(k, Float4(0.0f));
				1397	Float4 t = (eta * d + Sqrt(k));
				1398
				1399	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1400	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1401	}
				1402
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1403	void ShaderCore::refract3(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1404	{
				1405	Float4 d = dot3(N, I);
				1406	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1407	Int4 pos = CmpNLT(k, Float4(0.0f));
				1408	Float4 t = (eta * d + Sqrt(k));
				1409
				1410	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1411	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1412	dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
				1413	}
				1414
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1415	void ShaderCore::refract4(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1416	{
				1417	Float4 d = dot4(N, I);
				1418	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1419	Int4 pos = CmpNLT(k, Float4(0.0f));
				1420	Float4 t = (eta * d + Sqrt(k));
				1421
				1422	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1423	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1424	dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
				1425	dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w));
				1426	}
				1427
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1428	void ShaderCore::sgn(Vector4f &dst, const Vector4f &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1429	{
				1430	sgn(dst.x, src.x);
				1431	sgn(dst.y, src.y);
				1432	sgn(dst.z, src.z);
				1433	sgn(dst.w, src.w);
				1434	}
				1435
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1436	void ShaderCore::isgn(Vector4f &dst, const Vector4f &src)
				1437	{
				1438	isgn(dst.x, src.x);
				1439	isgn(dst.y, src.y);
				1440	isgn(dst.z, src.z);
				1441	isgn(dst.w, src.w);
				1442	}
				1443
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1444	void ShaderCore::abs(Vector4f &dst, const Vector4f &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1445	{
				1446	dst.x = Abs(src.x);
				1447	dst.y = Abs(src.y);
				1448	dst.z = Abs(src.z);
				1449	dst.w = Abs(src.w);
				1450	}
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1451
				1452	void ShaderCore::iabs(Vector4f &dst, const Vector4f &src)
				1453	{
				1454	dst.x = As<Float4>(Abs(As<Int4>(src.x)));
				1455	dst.y = As<Float4>(Abs(As<Int4>(src.y)));
				1456	dst.z = As<Float4>(Abs(As<Int4>(src.z)));
				1457	dst.w = As<Float4>(Abs(As<Int4>(src.w)));
				1458	}
				1459
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1460	void ShaderCore::nrm2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1461	{
				1462	Float4 dot = dot2(src, src);
				1463	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1464
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1465	dst.x = src.x * rsq;
				1466	dst.y = src.y * rsq;
				1467	dst.z = src.z * rsq;
				1468	dst.w = src.w * rsq;
				1469	}
				1470
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1471	void ShaderCore::nrm3(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1472	{
				1473	Float4 dot = dot3(src, src);
				1474	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
				1475
				1476	dst.x = src.x * rsq;
				1477	dst.y = src.y * rsq;
				1478	dst.z = src.z * rsq;
				1479	dst.w = src.w * rsq;
				1480	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1481
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1482	void ShaderCore::nrm4(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1483	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1484	Float4 dot = dot4(src, src);
				1485	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1486
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1487	dst.x = src.x * rsq;
				1488	dst.y = src.y * rsq;
				1489	dst.z = src.z * rsq;
				1490	dst.w = src.w * rsq;
				1491	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1492
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1493	void ShaderCore::sincos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1494	{
				1495	dst.x = cosine_pi(src.x, pp);
				1496	dst.y = sine_pi(src.x, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1497	}
				1498
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1499	void ShaderCore::cos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1500	{
				1501	dst.x = cosine(src.x, pp);
				1502	dst.y = cosine(src.y, pp);
				1503	dst.z = cosine(src.z, pp);
				1504	dst.w = cosine(src.w, pp);
				1505	}
				1506
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1507	void ShaderCore::sin(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1508	{
				1509	dst.x = sine(src.x, pp);
				1510	dst.y = sine(src.y, pp);
				1511	dst.z = sine(src.z, pp);
				1512	dst.w = sine(src.w, pp);
				1513	}
				1514
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1515	void ShaderCore::tan(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1516	{
				1517	dst.x = tangent(src.x, pp);
				1518	dst.y = tangent(src.y, pp);
				1519	dst.z = tangent(src.z, pp);
				1520	dst.w = tangent(src.w, pp);
				1521	}
				1522
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1523	void ShaderCore::acos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1524	{
				1525	dst.x = arccos(src.x, pp);
				1526	dst.y = arccos(src.y, pp);
				1527	dst.z = arccos(src.z, pp);
				1528	dst.w = arccos(src.w, pp);
				1529	}
				1530
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1531	void ShaderCore::asin(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1532	{
				1533	dst.x = arcsin(src.x, pp);
				1534	dst.y = arcsin(src.y, pp);
				1535	dst.z = arcsin(src.z, pp);
				1536	dst.w = arcsin(src.w, pp);
				1537	}
				1538
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1539	void ShaderCore::atan(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1540	{
				1541	dst.x = arctan(src.x, pp);
				1542	dst.y = arctan(src.y, pp);
				1543	dst.z = arctan(src.z, pp);
				1544	dst.w = arctan(src.w, pp);
				1545	}
				1546
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1547	void ShaderCore::atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1548	{
				1549	dst.x = arctan(src0.x, src1.x, pp);
				1550	dst.y = arctan(src0.y, src1.y, pp);
				1551	dst.z = arctan(src0.z, src1.z, pp);
				1552	dst.w = arctan(src0.w, src1.w, pp);
				1553	}
				1554
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1555	void ShaderCore::cosh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1556	{
				1557	dst.x = cosineh(src.x, pp);
				1558	dst.y = cosineh(src.y, pp);
				1559	dst.z = cosineh(src.z, pp);
				1560	dst.w = cosineh(src.w, pp);
				1561	}
				1562
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1563	void ShaderCore::sinh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1564	{
				1565	dst.x = sineh(src.x, pp);
				1566	dst.y = sineh(src.y, pp);
				1567	dst.z = sineh(src.z, pp);
				1568	dst.w = sineh(src.w, pp);
				1569	}
				1570
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1571	void ShaderCore::tanh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1572	{
				1573	dst.x = tangenth(src.x, pp);
				1574	dst.y = tangenth(src.y, pp);
				1575	dst.z = tangenth(src.z, pp);
				1576	dst.w = tangenth(src.w, pp);
				1577	}
				1578
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1579	void ShaderCore::acosh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1580	{
				1581	dst.x = arccosh(src.x, pp);
				1582	dst.y = arccosh(src.y, pp);
				1583	dst.z = arccosh(src.z, pp);
				1584	dst.w = arccosh(src.w, pp);
				1585	}
				1586
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1587	void ShaderCore::asinh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1588	{
				1589	dst.x = arcsinh(src.x, pp);
				1590	dst.y = arcsinh(src.y, pp);
				1591	dst.z = arcsinh(src.z, pp);
				1592	dst.w = arcsinh(src.w, pp);
				1593	}
				1594
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1595	void ShaderCore::atanh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1596	{
				1597	dst.x = arctanh(src.x, pp);
				1598	dst.y = arctanh(src.y, pp);
				1599	dst.z = arctanh(src.z, pp);
				1600	dst.w = arctanh(src.w, pp);
				1601	}
				1602
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1603	void ShaderCore::expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1604	{
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1605	if(shaderModel < 0x0200)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1606	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1607	Float4 frc = Frac(src.x);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1608	Float4 floor = src.x - frc;
				1609
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1610	dst.x = exponential2(floor, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1611	dst.y = frc;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1612	dst.z = exponential2(src.x, true);
				1613	dst.w = Float4(1.0f);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1614	}
				1615	else // Version >= 2.0
				1616	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1617	exp2x(dst, src, true); // FIXME: 10-bit precision suffices
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1618	}
				1619	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1620
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1621	void ShaderCore::logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1622	{
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1623	if(shaderModel < 0x0200)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1624	{
				1625	Float4 tmp0;
				1626	Float4 tmp1;
				1627	Float4 t;
				1628	Int4 r;
				1629
				1630	tmp0 = Abs(src.x);
				1631	tmp1 = tmp0;
				1632
				1633	// X component
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1634	r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1635	dst.x = Float4(r);
				1636
				1637	// Y component
				1638	dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) \| As<Int4>(Float4(1.0f)));
				1639
				1640	// Z component
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1641	dst.z = logarithm2(src.x, true, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1642
				1643	// W component
				1644	dst.w = 1.0f;
				1645	}
				1646	else
				1647	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1648	log2x(dst, src, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1649	}
				1650	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1651
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1652	void ShaderCore::cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1653	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1654	cmp0(dst.x, src0.x, src1.x, src2.x);
				1655	cmp0(dst.y, src0.y, src1.y, src2.y);
				1656	cmp0(dst.z, src0.z, src1.z, src2.z);
				1657	cmp0(dst.w, src0.w, src1.w, src2.w);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1658	}
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1659
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1660	void ShaderCore::select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1661	{
				1662	select(dst.x, As<Int4>(src0.x), src1.x, src2.x);
				1663	select(dst.y, As<Int4>(src0.y), src1.y, src2.y);
				1664	select(dst.z, As<Int4>(src0.z), src1.z, src2.z);
				1665	select(dst.w, As<Int4>(src0.w), src1.w, src2.w);
				1666	}
				1667
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1668	void ShaderCore::extract(Float4 &dst, const Vector4f &src0, const Float4 &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1669	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1670	select(dst, CmpEQ(As<Int4>(src1), Int4(1)), src0.y, src0.x);
				1671	select(dst, CmpEQ(As<Int4>(src1), Int4(2)), src0.z, dst);
				1672	select(dst, CmpEQ(As<Int4>(src1), Int4(3)), src0.w, dst);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1673	}
				1674
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1675	void ShaderCore::insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1676	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1677	select(dst.x, CmpEQ(As<Int4>(index), Int4(0)), element, src.x);
				1678	select(dst.y, CmpEQ(As<Int4>(index), Int4(1)), element, src.y);
				1679	select(dst.z, CmpEQ(As<Int4>(index), Int4(2)), element, src.z);
				1680	select(dst.w, CmpEQ(As<Int4>(index), Int4(3)), element, src.w);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1681	}
				1682
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1683	void ShaderCore::sgn(Float4 &dst, const Float4 &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1684	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1685	Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f));
				1686	Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1687	dst = As<Float4>(neg \| pos);
				1688	}
				1689
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1690	void ShaderCore::isgn(Float4 &dst, const Float4 &src)
				1691	{
				1692	Int4 neg = CmpLT(As<Int4>(src), Int4(0)) & Int4(-1);
				1693	Int4 pos = CmpNLE(As<Int4>(src), Int4(0)) & Int4(1);
				1694	dst = As<Float4>(neg \| pos);
				1695	}
				1696
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1697	void ShaderCore::cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1698	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1699	Int4 pos = CmpLE(Float4(0.0f), src0);
				1700	select(dst, pos, src1, src2);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1701	}
				1702
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1703	void ShaderCore::cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
				1704	{
				1705	Int4 pos = CmpEQ(Int4(0), As<Int4>(src0));
				1706	select(dst, pos, src1, src2);
				1707	}
				1708
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1709	void ShaderCore::select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1710	{
				1711	// FIXME: LLVM vector select
Tom Anderson	69bc6e8	2017-03-20 11:54:29 -0700	[diff] [blame]	1712	dst = As<Float4>((src0 & As<Int4>(src1)) \| (~src0 & As<Int4>(src2)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1713	}
				1714
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1715	void ShaderCore::cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1716	{
				1717	switch(control)
				1718	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1719	case Shader::CONTROL_GT:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1720	dst.x = As<Float4>(CmpNLE(src0.x, src1.x));
				1721	dst.y = As<Float4>(CmpNLE(src0.y, src1.y));
				1722	dst.z = As<Float4>(CmpNLE(src0.z, src1.z));
				1723	dst.w = As<Float4>(CmpNLE(src0.w, src1.w));
				1724	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1725	case Shader::CONTROL_EQ:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1726	dst.x = As<Float4>(CmpEQ(src0.x, src1.x));
				1727	dst.y = As<Float4>(CmpEQ(src0.y, src1.y));
				1728	dst.z = As<Float4>(CmpEQ(src0.z, src1.z));
				1729	dst.w = As<Float4>(CmpEQ(src0.w, src1.w));
				1730	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1731	case Shader::CONTROL_GE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1732	dst.x = As<Float4>(CmpNLT(src0.x, src1.x));
				1733	dst.y = As<Float4>(CmpNLT(src0.y, src1.y));
				1734	dst.z = As<Float4>(CmpNLT(src0.z, src1.z));
				1735	dst.w = As<Float4>(CmpNLT(src0.w, src1.w));
				1736	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1737	case Shader::CONTROL_LT:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1738	dst.x = As<Float4>(CmpLT(src0.x, src1.x));
				1739	dst.y = As<Float4>(CmpLT(src0.y, src1.y));
				1740	dst.z = As<Float4>(CmpLT(src0.z, src1.z));
				1741	dst.w = As<Float4>(CmpLT(src0.w, src1.w));
				1742	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1743	case Shader::CONTROL_NE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1744	dst.x = As<Float4>(CmpNEQ(src0.x, src1.x));
				1745	dst.y = As<Float4>(CmpNEQ(src0.y, src1.y));
				1746	dst.z = As<Float4>(CmpNEQ(src0.z, src1.z));
				1747	dst.w = As<Float4>(CmpNEQ(src0.w, src1.w));
				1748	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1749	case Shader::CONTROL_LE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1750	dst.x = As<Float4>(CmpLE(src0.x, src1.x));
				1751	dst.y = As<Float4>(CmpLE(src0.y, src1.y));
				1752	dst.z = As<Float4>(CmpLE(src0.z, src1.z));
				1753	dst.w = As<Float4>(CmpLE(src0.w, src1.w));
				1754	break;
				1755	default:
				1756	ASSERT(false);
				1757	}
				1758	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1759
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1760	void ShaderCore::icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1761	{
				1762	switch(control)
				1763	{
				1764	case Shader::CONTROL_GT:
				1765	dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x)));
				1766	dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y)));
				1767	dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z)));
				1768	dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w)));
				1769	break;
				1770	case Shader::CONTROL_EQ:
				1771	dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
				1772	dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
				1773	dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
				1774	dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
				1775	break;
				1776	case Shader::CONTROL_GE:
				1777	dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x)));
				1778	dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y)));
				1779	dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z)));
				1780	dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w)));
				1781	break;
				1782	case Shader::CONTROL_LT:
				1783	dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x)));
				1784	dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y)));
				1785	dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z)));
				1786	dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w)));
				1787	break;
				1788	case Shader::CONTROL_NE:
				1789	dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
				1790	dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
				1791	dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
				1792	dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
				1793	break;
				1794	case Shader::CONTROL_LE:
				1795	dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x)));
				1796	dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y)));
				1797	dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z)));
				1798	dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w)));
				1799	break;
				1800	default:
				1801	ASSERT(false);
				1802	}
				1803	}
				1804
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1805	void ShaderCore::ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
				1806	{
				1807	switch(control)
				1808	{
				1809	case Shader::CONTROL_GT:
				1810	dst.x = As<Float4>(CmpNLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1811	dst.y = As<Float4>(CmpNLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1812	dst.z = As<Float4>(CmpNLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1813	dst.w = As<Float4>(CmpNLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1814	break;
				1815	case Shader::CONTROL_EQ:
				1816	dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1817	dst.y = As<Float4>(CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1818	dst.z = As<Float4>(CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1819	dst.w = As<Float4>(CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1820	break;
				1821	case Shader::CONTROL_GE:
				1822	dst.x = As<Float4>(CmpNLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1823	dst.y = As<Float4>(CmpNLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1824	dst.z = As<Float4>(CmpNLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1825	dst.w = As<Float4>(CmpNLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1826	break;
				1827	case Shader::CONTROL_LT:
				1828	dst.x = As<Float4>(CmpLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1829	dst.y = As<Float4>(CmpLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1830	dst.z = As<Float4>(CmpLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1831	dst.w = As<Float4>(CmpLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1832	break;
				1833	case Shader::CONTROL_NE:
				1834	dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1835	dst.y = As<Float4>(CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1836	dst.z = As<Float4>(CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1837	dst.w = As<Float4>(CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1838	break;
				1839	case Shader::CONTROL_LE:
				1840	dst.x = As<Float4>(CmpLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1841	dst.y = As<Float4>(CmpLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1842	dst.z = As<Float4>(CmpLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1843	dst.w = As<Float4>(CmpLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1844	break;
				1845	default:
				1846	ASSERT(false);
				1847	}
				1848	}
				1849
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1850	void ShaderCore::all(Float4 &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1851	{
				1852	dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w));
				1853	}
				1854
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1855	void ShaderCore::any(Float4 &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1856	{
				1857	dst = As<Float4>(As<Int4>(src.x) \| As<Int4>(src.y) \| As<Int4>(src.z) \| As<Int4>(src.w));
				1858	}
				1859
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1860	void ShaderCore::bitwise_not(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1861	{
				1862	dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF));
				1863	dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF));
				1864	dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF));
				1865	dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF));
				1866	}
				1867
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1868	void ShaderCore::bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1869	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1870	dst.x = As<Float4>(As<Int4>(src0.x) \| As<Int4>(src1.x));
				1871	dst.y = As<Float4>(As<Int4>(src0.y) \| As<Int4>(src1.y));
				1872	dst.z = As<Float4>(As<Int4>(src0.z) \| As<Int4>(src1.z));
				1873	dst.w = As<Float4>(As<Int4>(src0.w) \| As<Int4>(src1.w));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1874	}
				1875
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1876	void ShaderCore::bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1877	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1878	dst.x = As<Float4>(As<Int4>(src0.x) ^ As<Int4>(src1.x));
				1879	dst.y = As<Float4>(As<Int4>(src0.y) ^ As<Int4>(src1.y));
				1880	dst.z = As<Float4>(As<Int4>(src0.z) ^ As<Int4>(src1.z));
				1881	dst.w = As<Float4>(As<Int4>(src0.w) ^ As<Int4>(src1.w));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1882	}
				1883
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1884	void ShaderCore::bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1885	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1886	dst.x = As<Float4>(As<Int4>(src0.x) & As<Int4>(src1.x));
				1887	dst.y = As<Float4>(As<Int4>(src0.y) & As<Int4>(src1.y));
				1888	dst.z = As<Float4>(As<Int4>(src0.z) & As<Int4>(src1.z));
				1889	dst.w = As<Float4>(As<Int4>(src0.w) & As<Int4>(src1.w));
				1890	}
				1891
				1892	void ShaderCore::equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1893	{
				1894	dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) &
				1895	CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) &
				1896	CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) &
				1897	CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1898	dst.y = dst.x;
				1899	dst.z = dst.x;
				1900	dst.w = dst.x;
				1901	}
				1902
				1903	void ShaderCore::notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1904	{
				1905	dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) \|
				1906	CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) \|
				1907	CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) \|
				1908	CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1909	dst.y = dst.x;
				1910	dst.z = dst.x;
				1911	dst.w = dst.x;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1912	}
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1913	}