blob: d55e47c7f2c482c5e2fa23456551ed49de490a01 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman19bac1e2014-05-06 15:23:49 -04003// Copyright(c) 2005-2012 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "ShaderCore.hpp"
13
John Bauman19bac1e2014-05-06 15:23:49 -040014#include "Renderer/Renderer.hpp"
15#include "Common/Debug.hpp"
John Bauman89401822014-05-06 15:04:28 -040016
17namespace sw
18{
John Bauman19bac1e2014-05-06 15:23:49 -040019 extern TranscendentalPrecision logPrecision;
20 extern TranscendentalPrecision expPrecision;
21 extern TranscendentalPrecision rcpPrecision;
22 extern TranscendentalPrecision rsqPrecision;
23
Alexis Hetu96517182015-04-15 10:30:23 -040024 Vector4s::Vector4s()
John Bauman19bac1e2014-05-06 15:23:49 -040025 {
26 }
27
Alexis Hetu96517182015-04-15 10:30:23 -040028 Vector4s::Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
John Bauman19bac1e2014-05-06 15:23:49 -040029 {
30 this->x = Short4(x);
31 this->y = Short4(y);
32 this->z = Short4(z);
33 this->w = Short4(w);
34 }
35
Alexis Hetu96517182015-04-15 10:30:23 -040036 Vector4s::Vector4s(const Vector4s &rhs)
John Bauman19bac1e2014-05-06 15:23:49 -040037 {
38 x = rhs.x;
39 y = rhs.y;
40 z = rhs.z;
41 w = rhs.w;
42 }
43
Alexis Hetu96517182015-04-15 10:30:23 -040044 Vector4s &Vector4s::operator=(const Vector4s &rhs)
John Bauman19bac1e2014-05-06 15:23:49 -040045 {
46 x = rhs.x;
47 y = rhs.y;
48 z = rhs.z;
49 w = rhs.w;
50
51 return *this;
52 }
53
Alexis Hetu96517182015-04-15 10:30:23 -040054 Short4 &Vector4s::operator[](int i)
John Bauman19bac1e2014-05-06 15:23:49 -040055 {
56 switch(i)
57 {
58 case 0: return x;
59 case 1: return y;
60 case 2: return z;
61 case 3: return w;
62 }
63
64 return x;
65 }
66
67 Vector4f::Vector4f()
68 {
69 }
70
71 Vector4f::Vector4f(float x, float y, float z, float w)
72 {
73 this->x = Float4(x);
74 this->y = Float4(y);
75 this->z = Float4(z);
76 this->w = Float4(w);
77 }
78
79 Vector4f::Vector4f(const Vector4f &rhs)
80 {
81 x = rhs.x;
82 y = rhs.y;
83 z = rhs.z;
84 w = rhs.w;
85 }
86
87 Vector4f &Vector4f::operator=(const Vector4f &rhs)
88 {
89 x = rhs.x;
90 y = rhs.y;
91 z = rhs.z;
92 w = rhs.w;
93
94 return *this;
95 }
96
97 Float4 &Vector4f::operator[](int i)
98 {
99 switch(i)
100 {
101 case 0: return x;
102 case 1: return y;
103 case 2: return z;
104 case 3: return w;
105 }
106
107 return x;
108 }
109
110 Float4 exponential2(RValue<Float4> x, bool pp)
111 {
112 Float4 x0;
113 Float4 x1;
114 Int4 x2;
115
116 x0 = x;
117
118 x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
119 x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
120 x1 = x0;
121 x1 -= Float4(0.5f);
122 x2 = RoundInt(x1);
123 x1 = Float4(x2);
124 x2 += Int4(0x0000007F); // 127
125 x2 = x2 << 23;
126 x0 -= x1;
127 x1 = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
128 x1 *= x0;
129 x1 += As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
130 x1 *= x0;
131 x1 += As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
132 x1 *= x0;
133 x1 += As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
134 x1 *= x0;
135 x1 += As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
136 x1 *= x0;
137 x1 += As<Float4>(Int4(0x3F7FFFFF)); // 9.9999994e-1f
138 x1 *= As<Float4>(x2);
139
140 return x1;
141 }
142
143 Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp)
144 {
145 Float4 x0;
146 Float4 x1;
147 Float4 x2;
148 Float4 x3;
149
150 x0 = x;
151
152 x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
153 x1 = As<Float4>(As<UInt4>(x1) >> 8);
154 x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
155 x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
156 x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
157
158 x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
159 x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
160 x2 /= x3;
161
162 x1 += (x0 - Float4(1.0f)) * x2;
163
164 return x1;
165 }
166
167 Float4 exponential(RValue<Float4> x, bool pp)
168 {
169 // FIXME: Propagate the constant
170 return exponential2(Float4(1.44269541f) * x, pp); // 1/ln(2)
171 }
172
173 Float4 logarithm(RValue<Float4> x, bool absolute, bool pp)
174 {
175 // FIXME: Propagate the constant
176 return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2)
177 }
178
179 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
180 {
181 Float4 log = logarithm2(x, true, pp);
182 log *= y;
183 return exponential2(log, pp);
184 }
185
186 Float4 reciprocal(RValue<Float4> x, bool pp, bool finite)
187 {
188 Float4 rcp;
189
190 if(!pp && rcpPrecision >= WHQL)
191 {
192 rcp = Float4(1.0f) / x;
193 }
194 else
195 {
196 rcp = Rcp_pp(x);
197
198 if(!pp)
199 {
200 rcp = (rcp + rcp) - (x * rcp * rcp);
201 }
202 }
203
204 if(finite)
205 {
206 int big = 0x7F7FFFFF;
207 rcp = Min(rcp, Float4((float&)big));
208 }
209
210 return rcp;
211 }
212
213 Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp)
214 {
215 Float4 abs = x;
216
217 if(absolute)
218 {
219 abs = Abs(abs);
220 }
221
222 Float4 rsq;
223
224 if(!pp && rsqPrecision >= IEEE)
225 {
226 rsq = Float4(1.0f) / Sqrt(abs);
227 }
228 else
229 {
230 rsq = RcpSqrt_pp(abs);
231
232 if(!pp)
233 {
234 rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f);
235 }
236 }
237
238 int big = 0x7F7FFFFF;
239 rsq = Min(rsq, Float4((float&)big));
240
241 return rsq;
242 }
243
244 Float4 modulo(RValue<Float4> x, RValue<Float4> y)
245 {
246 return x - y * Floor(x / y);
247 }
248
249 Float4 sine_pi(RValue<Float4> x, bool pp)
250 {
251 const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
252 const Float4 B = Float4(1.27323954e+0f); // 4/pi
253 const Float4 C = Float4(7.75160950e-1f);
254 const Float4 D = Float4(2.24839049e-1f);
255
256 // Parabola approximating sine
257 Float4 sin = x * (Abs(x) * A + B);
258
259 // Improve precision from 0.06 to 0.001
260 if(true)
261 {
262 sin = sin * (Abs(sin) * D + C);
263 }
264
265 return sin;
266 }
267
268 Float4 cosine_pi(RValue<Float4> x, bool pp)
269 {
270 // cos(x) = sin(x + pi/2)
271 Float4 y = x + Float4(1.57079632e+0f);
272
273 // Wrap around
274 y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f)));
275
276 return sine_pi(y, pp);
277 }
278
279 Float4 sine(RValue<Float4> x, bool pp)
280 {
281 // Reduce to [-0.5, 0.5] range
282 Float4 y = x * Float4(1.59154943e-1f); // 1/2pi
283 y = y - Round(y);
284
285 const Float4 A = Float4(-16.0f);
286 const Float4 B = Float4(8.0f);
287 const Float4 C = Float4(7.75160950e-1f);
288 const Float4 D = Float4(2.24839049e-1f);
289
290 // Parabola approximating sine
291 Float4 sin = y * (Abs(y) * A + B);
292
293 // Improve precision from 0.06 to 0.001
294 if(true)
295 {
296 sin = sin * (Abs(sin) * D + C);
297 }
298
299 return sin;
300 }
301
302 Float4 cosine(RValue<Float4> x, bool pp)
303 {
304 // cos(x) = sin(x + pi/2)
305 Float4 y = x + Float4(1.57079632e+0f);
306 return sine(y, pp);
307 }
308
309 Float4 tangent(RValue<Float4> x, bool pp)
310 {
311 return sine(x, pp) / cosine(x, pp);
312 }
313
314 Float4 arccos(RValue<Float4> x, bool pp)
315 {
316 // pi/2 - arcsin(x)
317 return Float4(1.57079632e+0f) - arcsin(x);
318 }
319
320 Float4 arcsin(RValue<Float4> x, bool pp)
321 {
322 // x*(pi/2-sqrt(1-x*x)*pi/5)
323 return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f));
324 }
325
326 Float4 arctan(RValue<Float4> x, bool pp)
327 {
328 Int4 O = CmpNLT(Abs(x), Float4(1.0f));
329 Float4 y = As<Float4>(O & As<Int4>(Float4(1.0f) / x) | ~O & As<Int4>(x)); // FIXME: Vector select
330
331 // Approximation of atan in [-1..1]
332 Float4 theta = y * (Float4(-0.27f) * Abs(y) + Float4(1.05539816f));
333
334 // +/-pi/2 depending on sign of x
335 Float4 sgnPi_2 = As<Float4>(As<Int4>(Float4(1.57079632e+0f)) ^ (As<Int4>(x) & Int4(0x80000000)));
336
337 theta = As<Float4>(O & As<Int4>(sgnPi_2 - theta) | ~O & As<Int4>(theta)); // FIXME: Vector select
338
339 return theta;
340 }
341
342 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
343 {
344 // Rotate to upper semicircle when in lower semicircle
345 Int4 S = CmpLT(y, Float4(0.0f));
346 Float4 theta = As<Float4>(S & As<Int4>(Float4(-3.14159265e+0f))); // -pi
347 Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x));
348 Float4 y0 = Abs(y);
349
350 // Rotate to right quadrant when in left quadrant
351 Int4 Q = CmpLT(x0, Float4(0.0f));
352 theta += As<Float4>(Q & As<Int4>(Float4(1.57079632e+0f))); // pi/2
353 Float4 x1 = As<Float4>(Q & As<Int4>(y0) | ~Q & As<Int4>(x0)); // FIXME: Vector select
354 Float4 y1 = As<Float4>(Q & As<Int4>(-x0) | ~Q & As<Int4>(y0)); // FIXME: Vector select
355
356 // Rotate to first octant when in second octant
357 Int4 O = CmpNLT(y1, x1);
358 theta += As<Float4>(O & As<Int4>(Float4(7.85398163e-1f))); // pi/4
359 Float4 x2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * x1 + Float4(7.07106781e-1f) * y1) | ~O & As<Int4>(x1)); // sqrt(2)/2 // FIXME: Vector select
360 Float4 y2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * y1 - Float4(7.07106781e-1f) * x1) | ~O & As<Int4>(y1)); // FIXME: Vector select
361
362 // Approximation of atan in [0..1]
363 Float4 y_x = y2 / x2;
364 theta += y_x * (Float4(-0.27f) * y_x + Float4(1.05539816f));
365
366 return theta;
367 }
368
369 Float4 dot2(Vector4f &v0, Vector4f &v1)
370 {
371 return v0.x * v1.x + v0.y * v1.y;
372 }
373
374 Float4 dot3(Vector4f &v0, Vector4f &v1)
375 {
376 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
377 }
378
379 Float4 dot4(Vector4f &v0, Vector4f &v1)
380 {
381 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
382 }
383
384 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
385 {
386 Int2 tmp0 = UnpackHigh(row0, row1);
387 Int2 tmp1 = UnpackHigh(row2, row3);
388 Int2 tmp2 = UnpackLow(row0, row1);
389 Int2 tmp3 = UnpackLow(row2, row3);
390
391 row0 = As<Short4>(UnpackLow(tmp2, tmp3));
392 row1 = As<Short4>(UnpackHigh(tmp2, tmp3));
393 row2 = As<Short4>(UnpackLow(tmp0, tmp1));
394 row3 = As<Short4>(UnpackHigh(tmp0, tmp1));
395 }
396
397 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
398 {
399 Float4 tmp0 = UnpackLow(row0, row1);
400 Float4 tmp1 = UnpackLow(row2, row3);
401 Float4 tmp2 = UnpackHigh(row0, row1);
402 Float4 tmp3 = UnpackHigh(row2, row3);
403
404 row0 = Float4(tmp0.xy, tmp1.xy);
405 row1 = Float4(tmp0.zw, tmp1.zw);
406 row2 = Float4(tmp2.xy, tmp3.xy);
407 row3 = Float4(tmp2.zw, tmp3.zw);
408 }
409
410 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
411 {
412 Float4 tmp0 = UnpackLow(row0, row1);
413 Float4 tmp1 = UnpackLow(row2, row3);
414 Float4 tmp2 = UnpackHigh(row0, row1);
415 Float4 tmp3 = UnpackHigh(row2, row3);
416
417 row0 = Float4(tmp0.xy, tmp1.xy);
418 row1 = Float4(tmp0.zw, tmp1.zw);
419 row2 = Float4(tmp2.xy, tmp3.xy);
420 }
421
422 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
423 {
424 Float4 tmp0 = UnpackLow(row0, row1);
425 Float4 tmp1 = UnpackLow(row2, row3);
426
427 row0 = Float4(tmp0.xy, tmp1.xy);
428 row1 = Float4(tmp0.zw, tmp1.zw);
429 }
430
431 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
432 {
433 Float4 tmp0 = UnpackLow(row0, row1);
434 Float4 tmp1 = UnpackLow(row2, row3);
435
436 row0 = Float4(tmp0.xy, tmp1.xy);
437 }
438
439 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
440 {
441 row0 = UnpackLow(row0, row1);
442 row1 = Float4(row0.zw, row1.zw);
443 row2 = UnpackHigh(row0, row1);
444 row3 = Float4(row2.zw, row3.zw);
445 }
446
447 void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
448 {
449 row0 = UnpackLow(row2, row3);
450 row1 = Float4(row0.zw, row1.zw);
451 row2 = UnpackHigh(row2, row3);
452 row3 = Float4(row2.zw, row3.zw);
453 }
454
455 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
456 {
457 switch(N)
458 {
459 case 1: transpose4x1(row0, row1, row2, row3); break;
460 case 2: transpose4x2(row0, row1, row2, row3); break;
461 case 3: transpose4x3(row0, row1, row2, row3); break;
462 case 4: transpose4x4(row0, row1, row2, row3); break;
463 }
464 }
465
466 void ShaderCore::mov(Vector4f &dst, Vector4f &src, bool floorToInteger)
John Bauman89401822014-05-06 15:04:28 -0400467 {
468 if(floorToInteger)
469 {
470 dst.x = Floor(src.x);
471 }
472 else
473 {
474 dst = src;
475 }
476 }
477
John Bauman19bac1e2014-05-06 15:23:49 -0400478 void ShaderCore::f2b(Vector4f &dst, Vector4f &src)
479 {
480 dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f)));
481 dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f)));
482 dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f)));
483 dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f)));
484 }
485
486 void ShaderCore::b2f(Vector4f &dst, Vector4f &src)
487 {
488 dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f)));
489 dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f)));
490 dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f)));
491 dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f)));
492 }
493
494 void ShaderCore::add(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400495 {
496 dst.x = src0.x + src1.x;
497 dst.y = src0.y + src1.y;
498 dst.z = src0.z + src1.z;
499 dst.w = src0.w + src1.w;
500 }
501
John Bauman19bac1e2014-05-06 15:23:49 -0400502 void ShaderCore::sub(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400503 {
504 dst.x = src0.x - src1.x;
505 dst.y = src0.y - src1.y;
506 dst.z = src0.z - src1.z;
507 dst.w = src0.w - src1.w;
508 }
509
John Bauman19bac1e2014-05-06 15:23:49 -0400510 void ShaderCore::mad(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2)
John Bauman89401822014-05-06 15:04:28 -0400511 {
512 dst.x = src0.x * src1.x + src2.x;
513 dst.y = src0.y * src1.y + src2.y;
514 dst.z = src0.z * src1.z + src2.z;
515 dst.w = src0.w * src1.w + src2.w;
516 }
517
John Bauman19bac1e2014-05-06 15:23:49 -0400518 void ShaderCore::mul(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400519 {
520 dst.x = src0.x * src1.x;
521 dst.y = src0.y * src1.y;
522 dst.z = src0.z * src1.z;
523 dst.w = src0.w * src1.w;
524 }
525
John Bauman19bac1e2014-05-06 15:23:49 -0400526 void ShaderCore::rcpx(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400527 {
528 Float4 rcp = reciprocal(src.x, pp, true);
529
530 dst.x = rcp;
531 dst.y = rcp;
532 dst.z = rcp;
533 dst.w = rcp;
534 }
535
John Bauman19bac1e2014-05-06 15:23:49 -0400536 void ShaderCore::div(Vector4f &dst, Vector4f &src0, Vector4f &src1)
537 {
538 dst.x = src0.x / src1.x;
539 dst.y = src0.y / src1.y;
540 dst.z = src0.z / src1.z;
541 dst.w = src0.w / src1.w;
542 }
543
544 void ShaderCore::mod(Vector4f &dst, Vector4f &src0, Vector4f &src1)
545 {
546 dst.x = modulo(src0.x, src1.x);
547 dst.y = modulo(src0.y, src1.y);
548 dst.z = modulo(src0.z, src1.z);
549 dst.w = modulo(src0.w, src1.w);
550 }
551
552 void ShaderCore::rsqx(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400553 {
554 Float4 rsq = reciprocalSquareRoot(src.x, true, pp);
555
John Bauman19bac1e2014-05-06 15:23:49 -0400556 dst.x = rsq;
557 dst.y = rsq;
558 dst.z = rsq;
559 dst.w = rsq;
John Bauman89401822014-05-06 15:04:28 -0400560 }
561
John Bauman19bac1e2014-05-06 15:23:49 -0400562 void ShaderCore::sqrt(Vector4f &dst, Vector4f &src, bool pp)
563 {
564 dst.x = Sqrt(src.x);
565 dst.y = Sqrt(src.y);
566 dst.z = Sqrt(src.z);
567 dst.w = Sqrt(src.w);
568 }
569
570 void ShaderCore::rsq(Vector4f &dst, Vector4f &src, bool pp)
571 {
572 dst.x = reciprocalSquareRoot(src.x, false, pp);
573 dst.y = reciprocalSquareRoot(src.y, false, pp);
574 dst.z = reciprocalSquareRoot(src.z, false, pp);
575 dst.w = reciprocalSquareRoot(src.w, false, pp);
576 }
577
578 void ShaderCore::len2(Float4 &dst, Vector4f &src, bool pp)
579 {
580 dst = Sqrt(dot2(src, src));
581 }
582
583 void ShaderCore::len3(Float4 &dst, Vector4f &src, bool pp)
584 {
585 dst = Sqrt(dot3(src, src));
586 }
587
588 void ShaderCore::len4(Float4 &dst, Vector4f &src, bool pp)
589 {
590 dst = Sqrt(dot4(src, src));
591 }
592
593 void ShaderCore::dist1(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp)
594 {
595 dst = Abs(src0.x - src1.x);
596 }
597
598 void ShaderCore::dist2(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp)
599 {
600 Float4 dx = src0.x - src1.x;
601 Float4 dy = src0.y - src1.y;
602 Float4 dot2 = dx * dx + dy * dy;
603 dst = Sqrt(dot2);
604 }
605
606 void ShaderCore::dist3(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp)
607 {
608 Float4 dx = src0.x - src1.x;
609 Float4 dy = src0.y - src1.y;
610 Float4 dz = src0.z - src1.z;
611 Float4 dot3 = dx * dx + dy * dy + dz * dz;
612 dst = Sqrt(dot3);
613 }
614
615 void ShaderCore::dist4(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp)
616 {
617 Float4 dx = src0.x - src1.x;
618 Float4 dy = src0.y - src1.y;
619 Float4 dz = src0.z - src1.z;
620 Float4 dw = src0.w - src1.w;
621 Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw;
622 dst = Sqrt(dot4);
623 }
624
625 void ShaderCore::dp1(Vector4f &dst, Vector4f &src0, Vector4f &src1)
626 {
627 Float4 t = src0.x * src1.x;
628
629 dst.x = t;
630 dst.y = t;
631 dst.z = t;
632 dst.w = t;
633 }
634
635 void ShaderCore::dp2(Vector4f &dst, Vector4f &src0, Vector4f &src1)
636 {
637 Float4 t = dot2(src0, src1);
638
639 dst.x = t;
640 dst.y = t;
641 dst.z = t;
642 dst.w = t;
643 }
644
645 void ShaderCore::dp2add(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2)
646 {
647 Float4 t = dot2(src0, src1) + src2.x;
648
649 dst.x = t;
650 dst.y = t;
651 dst.z = t;
652 dst.w = t;
653 }
654
655 void ShaderCore::dp3(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400656 {
657 Float4 dot = dot3(src0, src1);
658
659 dst.x = dot;
660 dst.y = dot;
661 dst.z = dot;
662 dst.w = dot;
663 }
664
John Bauman19bac1e2014-05-06 15:23:49 -0400665 void ShaderCore::dp4(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400666 {
667 Float4 dot = dot4(src0, src1);
668
669 dst.x = dot;
670 dst.y = dot;
671 dst.z = dot;
672 dst.w = dot;
673 }
674
John Bauman19bac1e2014-05-06 15:23:49 -0400675 void ShaderCore::min(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400676 {
677 dst.x = Min(src0.x, src1.x);
678 dst.y = Min(src0.y, src1.y);
679 dst.z = Min(src0.z, src1.z);
680 dst.w = Min(src0.w, src1.w);
681 }
682
John Bauman19bac1e2014-05-06 15:23:49 -0400683 void ShaderCore::max(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400684 {
685 dst.x = Max(src0.x, src1.x);
686 dst.y = Max(src0.y, src1.y);
687 dst.z = Max(src0.z, src1.z);
688 dst.w = Max(src0.w, src1.w);
689 }
690
John Bauman19bac1e2014-05-06 15:23:49 -0400691 void ShaderCore::slt(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400692 {
John Bauman19bac1e2014-05-06 15:23:49 -0400693 dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f)));
694 dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f)));
695 dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f)));
696 dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f)));
John Bauman89401822014-05-06 15:04:28 -0400697 }
698
John Bauman19bac1e2014-05-06 15:23:49 -0400699 void ShaderCore::step(Vector4f &dst, Vector4f &edge, Vector4f &x)
John Bauman89401822014-05-06 15:04:28 -0400700 {
John Bauman19bac1e2014-05-06 15:23:49 -0400701 dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f)));
702 dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f)));
703 dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f)));
704 dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f)));
John Bauman89401822014-05-06 15:04:28 -0400705 }
706
John Bauman19bac1e2014-05-06 15:23:49 -0400707 void ShaderCore::exp2x(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400708 {
John Bauman19bac1e2014-05-06 15:23:49 -0400709 Float4 exp = exponential2(src.x, pp);
John Bauman89401822014-05-06 15:04:28 -0400710
711 dst.x = exp;
712 dst.y = exp;
713 dst.z = exp;
714 dst.w = exp;
715 }
716
John Bauman19bac1e2014-05-06 15:23:49 -0400717 void ShaderCore::exp2(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400718 {
John Bauman19bac1e2014-05-06 15:23:49 -0400719 dst.x = exponential2(src.x, pp);
720 dst.y = exponential2(src.y, pp);
721 dst.z = exponential2(src.z, pp);
722 dst.w = exponential2(src.w, pp);
723 }
724
725 void ShaderCore::exp(Vector4f &dst, Vector4f &src, bool pp)
726 {
727 dst.x = exponential(src.x, pp);
728 dst.y = exponential(src.y, pp);
729 dst.z = exponential(src.z, pp);
730 dst.w = exponential(src.w, pp);
731 }
732
733 void ShaderCore::log2x(Vector4f &dst, Vector4f &src, bool pp)
734 {
735 Float4 log = logarithm2(src.x, true, pp);
John Bauman89401822014-05-06 15:04:28 -0400736
737 dst.x = log;
738 dst.y = log;
739 dst.z = log;
740 dst.w = log;
741 }
742
John Bauman19bac1e2014-05-06 15:23:49 -0400743 void ShaderCore::log2(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400744 {
John Bauman19bac1e2014-05-06 15:23:49 -0400745 dst.x = logarithm2(src.x, pp);
746 dst.y = logarithm2(src.y, pp);
747 dst.z = logarithm2(src.z, pp);
748 dst.w = logarithm2(src.w, pp);
749 }
750
751 void ShaderCore::log(Vector4f &dst, Vector4f &src, bool pp)
752 {
753 dst.x = logarithm(src.x, false, pp);
754 dst.y = logarithm(src.y, false, pp);
755 dst.z = logarithm(src.z, false, pp);
756 dst.w = logarithm(src.w, false, pp);
757 }
758
759 void ShaderCore::lit(Vector4f &dst, Vector4f &src)
760 {
761 dst.x = Float4(1.0f);
762 dst.y = Max(src.x, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400763
764 Float4 pow;
765
766 pow = src.w;
John Bauman19bac1e2014-05-06 15:23:49 -0400767 pow = Min(pow, Float4(127.9961f));
768 pow = Max(pow, Float4(-127.9961f));
John Bauman89401822014-05-06 15:04:28 -0400769
770 dst.z = power(src.y, pow);
John Bauman19bac1e2014-05-06 15:23:49 -0400771 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f)));
772 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f)));
John Bauman89401822014-05-06 15:04:28 -0400773
John Bauman19bac1e2014-05-06 15:23:49 -0400774 dst.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -0400775 }
776
John Bauman19bac1e2014-05-06 15:23:49 -0400777 void ShaderCore::att(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400778 {
John Bauman19bac1e2014-05-06 15:23:49 -0400779 // Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d
John Bauman89401822014-05-06 15:04:28 -0400780 dst.x = 1;
781 dst.y = src0.y * src1.y;
782 dst.z = src0.z;
783 dst.w = src1.w;
784 }
785
John Bauman19bac1e2014-05-06 15:23:49 -0400786 void ShaderCore::lrp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2)
John Bauman89401822014-05-06 15:04:28 -0400787 {
788 dst.x = src0.x * (src1.x - src2.x) + src2.x;
789 dst.y = src0.y * (src1.y - src2.y) + src2.y;
790 dst.z = src0.z * (src1.z - src2.z) + src2.z;
791 dst.w = src0.w * (src1.w - src2.w) + src2.w;
792 }
793
John Bauman19bac1e2014-05-06 15:23:49 -0400794 void ShaderCore::smooth(Vector4f &dst, Vector4f &edge0, Vector4f &edge1, Vector4f &x)
John Bauman89401822014-05-06 15:04:28 -0400795 {
John Bauman19bac1e2014-05-06 15:23:49 -0400796 Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx);
797 Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty);
798 Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz);
799 Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
John Bauman89401822014-05-06 15:04:28 -0400800 }
801
John Bauman19bac1e2014-05-06 15:23:49 -0400802 void ShaderCore::frc(Vector4f &dst, Vector4f &src)
803 {
804 dst.x = Frac(src.x);
805 dst.y = Frac(src.y);
806 dst.z = Frac(src.z);
807 dst.w = Frac(src.w);
808 }
809
810 void ShaderCore::trunc(Vector4f &dst, Vector4f &src)
811 {
812 dst.x = Trunc(src.x);
813 dst.y = Trunc(src.y);
814 dst.z = Trunc(src.z);
815 dst.w = Trunc(src.w);
816 }
817
818 void ShaderCore::floor(Vector4f &dst, Vector4f &src)
819 {
820 dst.x = Floor(src.x);
821 dst.y = Floor(src.y);
822 dst.z = Floor(src.z);
823 dst.w = Floor(src.w);
824 }
825
826 void ShaderCore::ceil(Vector4f &dst, Vector4f &src)
827 {
828 dst.x = Ceil(src.x);
829 dst.y = Ceil(src.y);
830 dst.z = Ceil(src.z);
831 dst.w = Ceil(src.w);
832 }
833
834 void ShaderCore::powx(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp)
John Bauman89401822014-05-06 15:04:28 -0400835 {
836 Float4 pow = power(src0.x, src1.x, pp);
837
838 dst.x = pow;
839 dst.y = pow;
840 dst.z = pow;
841 dst.w = pow;
842 }
843
John Bauman19bac1e2014-05-06 15:23:49 -0400844 void ShaderCore::pow(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp)
845 {
846 dst.x = power(src0.x, src1.x, pp);
847 dst.y = power(src0.y, src1.y, pp);
848 dst.z = power(src0.z, src1.z, pp);
849 dst.w = power(src0.w, src1.w, pp);
850 }
851
852 void ShaderCore::crs(Vector4f &dst, Vector4f &src0, Vector4f &src1)
John Bauman89401822014-05-06 15:04:28 -0400853 {
854 dst.x = src0.y * src1.z - src0.z * src1.y;
855 dst.y = src0.z * src1.x - src0.x * src1.z;
856 dst.z = src0.x * src1.y - src0.y * src1.x;
857 }
858
John Bauman19bac1e2014-05-06 15:23:49 -0400859 void ShaderCore::forward1(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref)
860 {
861 Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000);
862
863 dst.x = As<Float4>(flip ^ As<Int4>(N.x));
864 }
865
866 void ShaderCore::forward2(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref)
867 {
868 Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000);
869
870 dst.x = As<Float4>(flip ^ As<Int4>(N.x));
871 dst.y = As<Float4>(flip ^ As<Int4>(N.y));
872 }
873
874 void ShaderCore::forward3(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref)
875 {
876 Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000);
877
878 dst.x = As<Float4>(flip ^ As<Int4>(N.x));
879 dst.y = As<Float4>(flip ^ As<Int4>(N.y));
880 dst.z = As<Float4>(flip ^ As<Int4>(N.z));
881 }
882
883 void ShaderCore::forward4(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref)
884 {
885 Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000);
886
887 dst.x = As<Float4>(flip ^ As<Int4>(N.x));
888 dst.y = As<Float4>(flip ^ As<Int4>(N.y));
889 dst.z = As<Float4>(flip ^ As<Int4>(N.z));
890 dst.w = As<Float4>(flip ^ As<Int4>(N.w));
891 }
892
893 void ShaderCore::reflect1(Vector4f &dst, Vector4f &I, Vector4f &N)
894 {
895 Float4 d = N.x * I.x;
896
897 dst.x = I.x - Float4(2.0f) * d * N.x;
898 }
899
900 void ShaderCore::reflect2(Vector4f &dst, Vector4f &I, Vector4f &N)
901 {
902 Float4 d = dot2(N, I);
903
904 dst.x = I.x - Float4(2.0f) * d * N.x;
905 dst.y = I.y - Float4(2.0f) * d * N.y;
906 }
907
908 void ShaderCore::reflect3(Vector4f &dst, Vector4f &I, Vector4f &N)
909 {
910 Float4 d = dot3(N, I);
911
912 dst.x = I.x - Float4(2.0f) * d * N.x;
913 dst.y = I.y - Float4(2.0f) * d * N.y;
914 dst.z = I.z - Float4(2.0f) * d * N.z;
915 }
916
917 void ShaderCore::reflect4(Vector4f &dst, Vector4f &I, Vector4f &N)
918 {
919 Float4 d = dot4(N, I);
920
921 dst.x = I.x - Float4(2.0f) * d * N.x;
922 dst.y = I.y - Float4(2.0f) * d * N.y;
923 dst.z = I.z - Float4(2.0f) * d * N.z;
924 dst.w = I.w - Float4(2.0f) * d * N.w;
925 }
926
927 void ShaderCore::refract1(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta)
928 {
929 Float4 d = N.x * I.x;
930 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
931 Int4 pos = CmpNLT(k, Float4(0.0f));
932 Float4 t = (eta * d + Sqrt(k));
933
934 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
935 }
936
937 void ShaderCore::refract2(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta)
938 {
939 Float4 d = dot2(N, I);
940 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
941 Int4 pos = CmpNLT(k, Float4(0.0f));
942 Float4 t = (eta * d + Sqrt(k));
943
944 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
945 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
946 }
947
948 void ShaderCore::refract3(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta)
949 {
950 Float4 d = dot3(N, I);
951 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
952 Int4 pos = CmpNLT(k, Float4(0.0f));
953 Float4 t = (eta * d + Sqrt(k));
954
955 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
956 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
957 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
958 }
959
960 void ShaderCore::refract4(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta)
961 {
962 Float4 d = dot4(N, I);
963 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
964 Int4 pos = CmpNLT(k, Float4(0.0f));
965 Float4 t = (eta * d + Sqrt(k));
966
967 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
968 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
969 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
970 dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w));
971 }
972
973 void ShaderCore::sgn(Vector4f &dst, Vector4f &src)
John Bauman89401822014-05-06 15:04:28 -0400974 {
975 sgn(dst.x, src.x);
976 sgn(dst.y, src.y);
977 sgn(dst.z, src.z);
978 sgn(dst.w, src.w);
979 }
980
John Bauman19bac1e2014-05-06 15:23:49 -0400981 void ShaderCore::abs(Vector4f &dst, Vector4f &src)
John Bauman89401822014-05-06 15:04:28 -0400982 {
983 dst.x = Abs(src.x);
984 dst.y = Abs(src.y);
985 dst.z = Abs(src.z);
986 dst.w = Abs(src.w);
987 }
John Bauman19bac1e2014-05-06 15:23:49 -0400988
989 void ShaderCore::nrm2(Vector4f &dst, Vector4f &src, bool pp)
990 {
991 Float4 dot = dot2(src, src);
992 Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman89401822014-05-06 15:04:28 -0400993
John Bauman19bac1e2014-05-06 15:23:49 -0400994 dst.x = src.x * rsq;
995 dst.y = src.y * rsq;
996 dst.z = src.z * rsq;
997 dst.w = src.w * rsq;
998 }
999
1000 void ShaderCore::nrm3(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -04001001 {
1002 Float4 dot = dot3(src, src);
1003 Float4 rsq = reciprocalSquareRoot(dot, false, pp);
1004
1005 dst.x = src.x * rsq;
1006 dst.y = src.y * rsq;
1007 dst.z = src.z * rsq;
1008 dst.w = src.w * rsq;
1009 }
John Bauman19bac1e2014-05-06 15:23:49 -04001010
1011 void ShaderCore::nrm4(Vector4f &dst, Vector4f &src, bool pp)
John Bauman89401822014-05-06 15:04:28 -04001012 {
John Bauman19bac1e2014-05-06 15:23:49 -04001013 Float4 dot = dot4(src, src);
1014 Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman89401822014-05-06 15:04:28 -04001015
John Bauman19bac1e2014-05-06 15:23:49 -04001016 dst.x = src.x * rsq;
1017 dst.y = src.y * rsq;
1018 dst.z = src.z * rsq;
1019 dst.w = src.w * rsq;
1020 }
1021
1022 void ShaderCore::sincos(Vector4f &dst, Vector4f &src, bool pp)
1023 {
1024 dst.x = cosine_pi(src.x, pp);
1025 dst.y = sine_pi(src.x, pp);
John Bauman89401822014-05-06 15:04:28 -04001026 }
1027
John Bauman19bac1e2014-05-06 15:23:49 -04001028 void ShaderCore::cos(Vector4f &dst, Vector4f &src, bool pp)
1029 {
1030 dst.x = cosine(src.x, pp);
1031 dst.y = cosine(src.y, pp);
1032 dst.z = cosine(src.z, pp);
1033 dst.w = cosine(src.w, pp);
1034 }
1035
1036 void ShaderCore::sin(Vector4f &dst, Vector4f &src, bool pp)
1037 {
1038 dst.x = sine(src.x, pp);
1039 dst.y = sine(src.y, pp);
1040 dst.z = sine(src.z, pp);
1041 dst.w = sine(src.w, pp);
1042 }
1043
1044 void ShaderCore::tan(Vector4f &dst, Vector4f &src, bool pp)
1045 {
1046 dst.x = tangent(src.x, pp);
1047 dst.y = tangent(src.y, pp);
1048 dst.z = tangent(src.z, pp);
1049 dst.w = tangent(src.w, pp);
1050 }
1051
1052 void ShaderCore::acos(Vector4f &dst, Vector4f &src, bool pp)
1053 {
1054 dst.x = arccos(src.x, pp);
1055 dst.y = arccos(src.y, pp);
1056 dst.z = arccos(src.z, pp);
1057 dst.w = arccos(src.w, pp);
1058 }
1059
1060 void ShaderCore::asin(Vector4f &dst, Vector4f &src, bool pp)
1061 {
1062 dst.x = arcsin(src.x, pp);
1063 dst.y = arcsin(src.y, pp);
1064 dst.z = arcsin(src.z, pp);
1065 dst.w = arcsin(src.w, pp);
1066 }
1067
1068 void ShaderCore::atan(Vector4f &dst, Vector4f &src, bool pp)
1069 {
1070 dst.x = arctan(src.x, pp);
1071 dst.y = arctan(src.y, pp);
1072 dst.z = arctan(src.z, pp);
1073 dst.w = arctan(src.w, pp);
1074 }
1075
1076 void ShaderCore::atan2(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp)
1077 {
1078 dst.x = arctan(src0.x, src1.x, pp);
1079 dst.y = arctan(src0.y, src1.y, pp);
1080 dst.z = arctan(src0.z, src1.z, pp);
1081 dst.w = arctan(src0.w, src1.w, pp);
1082 }
1083
1084 void ShaderCore::expp(Vector4f &dst, Vector4f &src, unsigned short version)
John Bauman89401822014-05-06 15:04:28 -04001085 {
1086 if(version < 0x0200)
1087 {
John Bauman19bac1e2014-05-06 15:23:49 -04001088 Float4 frc = Frac(src.x);
John Bauman89401822014-05-06 15:04:28 -04001089 Float4 floor = src.x - frc;
1090
John Bauman19bac1e2014-05-06 15:23:49 -04001091 dst.x = exponential2(floor, true);
John Bauman89401822014-05-06 15:04:28 -04001092 dst.y = frc;
John Bauman19bac1e2014-05-06 15:23:49 -04001093 dst.z = exponential2(src.x, true);
1094 dst.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001095 }
1096 else // Version >= 2.0
1097 {
John Bauman19bac1e2014-05-06 15:23:49 -04001098 exp2x(dst, src, true); // FIXME: 10-bit precision suffices
John Bauman89401822014-05-06 15:04:28 -04001099 }
1100 }
1101
John Bauman19bac1e2014-05-06 15:23:49 -04001102 void ShaderCore::logp(Vector4f &dst, Vector4f &src, unsigned short version)
John Bauman89401822014-05-06 15:04:28 -04001103 {
1104 if(version < 0x0200)
1105 {
1106 Float4 tmp0;
1107 Float4 tmp1;
1108 Float4 t;
1109 Int4 r;
1110
1111 tmp0 = Abs(src.x);
1112 tmp1 = tmp0;
1113
1114 // X component
John Bauman19bac1e2014-05-06 15:23:49 -04001115 r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127);
John Bauman89401822014-05-06 15:04:28 -04001116 dst.x = Float4(r);
1117
1118 // Y component
1119 dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
1120
1121 // Z component
John Bauman19bac1e2014-05-06 15:23:49 -04001122 dst.z = logarithm2(src.x, true, true);
John Bauman89401822014-05-06 15:04:28 -04001123
1124 // W component
1125 dst.w = 1.0f;
1126 }
1127 else
1128 {
John Bauman19bac1e2014-05-06 15:23:49 -04001129 log2x(dst, src, true);
John Bauman89401822014-05-06 15:04:28 -04001130 }
1131 }
1132
John Bauman19bac1e2014-05-06 15:23:49 -04001133 void ShaderCore::cmp0(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2)
John Bauman89401822014-05-06 15:04:28 -04001134 {
John Bauman19bac1e2014-05-06 15:23:49 -04001135 cmp0(dst.x, src0.x, src1.x, src2.x);
1136 cmp0(dst.y, src0.y, src1.y, src2.y);
1137 cmp0(dst.z, src0.z, src1.z, src2.z);
1138 cmp0(dst.w, src0.w, src1.w, src2.w);
John Bauman89401822014-05-06 15:04:28 -04001139 }
John Bauman89401822014-05-06 15:04:28 -04001140
John Bauman19bac1e2014-05-06 15:23:49 -04001141 void ShaderCore::select(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2)
1142 {
1143 select(dst.x, As<Int4>(src0.x), src1.x, src2.x);
1144 select(dst.y, As<Int4>(src0.y), src1.y, src2.y);
1145 select(dst.z, As<Int4>(src0.z), src1.z, src2.z);
1146 select(dst.w, As<Int4>(src0.w), src1.w, src2.w);
1147 }
1148
1149 void ShaderCore::extract(Float4 &dst, Vector4f &src0, Float4 &src1)
1150 {
1151 select(dst, CmpEQ(src1, Float4(1.0f)), src0.y, src0.x);
1152 select(dst, CmpEQ(src1, Float4(2.0f)), src0.z, dst);
1153 select(dst, CmpEQ(src1, Float4(3.0f)), src0.w, dst);
1154 }
1155
1156 void ShaderCore::insert(Vector4f &dst, Vector4f &src, Float4 &element, Float4 &index)
1157 {
1158 select(dst.x, CmpEQ(index, Float4(0.0f)), element, src.x);
1159 select(dst.y, CmpEQ(index, Float4(1.0f)), element, src.y);
1160 select(dst.z, CmpEQ(index, Float4(2.0f)), element, src.z);
1161 select(dst.w, CmpEQ(index, Float4(3.0f)), element, src.w);
John Bauman89401822014-05-06 15:04:28 -04001162 }
1163
1164 void ShaderCore::sgn(Float4 &dst, Float4 &src)
1165 {
John Bauman19bac1e2014-05-06 15:23:49 -04001166 Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f));
1167 Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -04001168 dst = As<Float4>(neg | pos);
1169 }
1170
John Bauman19bac1e2014-05-06 15:23:49 -04001171 void ShaderCore::cmp0(Float4 &dst, Float4 &src0, Float4 &src1, Float4 &src2)
John Bauman89401822014-05-06 15:04:28 -04001172 {
John Bauman19bac1e2014-05-06 15:23:49 -04001173 Int4 pos = CmpLE(Float4(0.0f), src0);
1174 select(dst, pos, src1, src2);
John Bauman89401822014-05-06 15:04:28 -04001175 }
1176
John Bauman19bac1e2014-05-06 15:23:49 -04001177 void ShaderCore::select(Float4 &dst, RValue<Int4> src0, Float4 &src1, Float4 &src2)
1178 {
1179 // FIXME: LLVM vector select
1180 dst = As<Float4>(src0 & As<Int4>(src1) | ~src0 & As<Int4>(src2));
1181 }
1182
1183 void ShaderCore::cmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control)
John Bauman89401822014-05-06 15:04:28 -04001184 {
1185 switch(control)
1186 {
John Bauman19bac1e2014-05-06 15:23:49 -04001187 case Shader::CONTROL_GT:
John Bauman89401822014-05-06 15:04:28 -04001188 dst.x = As<Float4>(CmpNLE(src0.x, src1.x));
1189 dst.y = As<Float4>(CmpNLE(src0.y, src1.y));
1190 dst.z = As<Float4>(CmpNLE(src0.z, src1.z));
1191 dst.w = As<Float4>(CmpNLE(src0.w, src1.w));
1192 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001193 case Shader::CONTROL_EQ:
John Bauman89401822014-05-06 15:04:28 -04001194 dst.x = As<Float4>(CmpEQ(src0.x, src1.x));
1195 dst.y = As<Float4>(CmpEQ(src0.y, src1.y));
1196 dst.z = As<Float4>(CmpEQ(src0.z, src1.z));
1197 dst.w = As<Float4>(CmpEQ(src0.w, src1.w));
1198 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001199 case Shader::CONTROL_GE:
John Bauman89401822014-05-06 15:04:28 -04001200 dst.x = As<Float4>(CmpNLT(src0.x, src1.x));
1201 dst.y = As<Float4>(CmpNLT(src0.y, src1.y));
1202 dst.z = As<Float4>(CmpNLT(src0.z, src1.z));
1203 dst.w = As<Float4>(CmpNLT(src0.w, src1.w));
1204 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001205 case Shader::CONTROL_LT:
John Bauman89401822014-05-06 15:04:28 -04001206 dst.x = As<Float4>(CmpLT(src0.x, src1.x));
1207 dst.y = As<Float4>(CmpLT(src0.y, src1.y));
1208 dst.z = As<Float4>(CmpLT(src0.z, src1.z));
1209 dst.w = As<Float4>(CmpLT(src0.w, src1.w));
1210 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001211 case Shader::CONTROL_NE:
John Bauman89401822014-05-06 15:04:28 -04001212 dst.x = As<Float4>(CmpNEQ(src0.x, src1.x));
1213 dst.y = As<Float4>(CmpNEQ(src0.y, src1.y));
1214 dst.z = As<Float4>(CmpNEQ(src0.z, src1.z));
1215 dst.w = As<Float4>(CmpNEQ(src0.w, src1.w));
1216 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001217 case Shader::CONTROL_LE:
John Bauman89401822014-05-06 15:04:28 -04001218 dst.x = As<Float4>(CmpLE(src0.x, src1.x));
1219 dst.y = As<Float4>(CmpLE(src0.y, src1.y));
1220 dst.z = As<Float4>(CmpLE(src0.z, src1.z));
1221 dst.w = As<Float4>(CmpLE(src0.w, src1.w));
1222 break;
1223 default:
1224 ASSERT(false);
1225 }
1226 }
John Bauman19bac1e2014-05-06 15:23:49 -04001227
1228 void ShaderCore::icmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control)
1229 {
1230 switch(control)
1231 {
1232 case Shader::CONTROL_GT:
1233 dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x)));
1234 dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y)));
1235 dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z)));
1236 dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w)));
1237 break;
1238 case Shader::CONTROL_EQ:
1239 dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
1240 dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
1241 dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
1242 dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
1243 break;
1244 case Shader::CONTROL_GE:
1245 dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x)));
1246 dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y)));
1247 dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z)));
1248 dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w)));
1249 break;
1250 case Shader::CONTROL_LT:
1251 dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x)));
1252 dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y)));
1253 dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z)));
1254 dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w)));
1255 break;
1256 case Shader::CONTROL_NE:
1257 dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
1258 dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
1259 dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
1260 dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
1261 break;
1262 case Shader::CONTROL_LE:
1263 dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x)));
1264 dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y)));
1265 dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z)));
1266 dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w)));
1267 break;
1268 default:
1269 ASSERT(false);
1270 }
1271 }
1272
1273 void ShaderCore::all(Float4 &dst, Vector4f &src)
1274 {
1275 dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w));
1276 }
1277
1278 void ShaderCore::any(Float4 &dst, Vector4f &src)
1279 {
1280 dst = As<Float4>(As<Int4>(src.x) | As<Int4>(src.y) | As<Int4>(src.z) | As<Int4>(src.w));
1281 }
1282
1283 void ShaderCore::not(Vector4f &dst, Vector4f &src)
1284 {
1285 dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF));
1286 dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF));
1287 dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF));
1288 dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF));
1289 }
1290
1291 void ShaderCore::or(Float4 &dst, Float4 &src0, Float4 &src1)
1292 {
1293 dst = As<Float4>(As<Int4>(src0) | As<Int4>(src1));
1294 }
1295
1296 void ShaderCore::xor(Float4 &dst, Float4 &src0, Float4 &src1)
1297 {
1298 dst = As<Float4>(As<Int4>(src0) ^ As<Int4>(src1));
1299 }
1300
1301 void ShaderCore::and(Float4 &dst, Float4 &src0, Float4 &src1)
1302 {
1303 dst = As<Float4>(As<Int4>(src0) & As<Int4>(src1));
1304 }
John Bauman89401822014-05-06 15:04:28 -04001305}