blob: 8adeed8619575287db34ebd8a22a3dd9b6712a8d [file] [log] [blame]
Nicolas Capens8833e012016-01-01 23:47:52 -05001// SwiftShader Software Renderer
2//
3// Copyright(c) 2015 Google Inc.
4//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of Google Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelPipeline.hpp"
13#include "Renderer.hpp"
14#include "SamplerCore.hpp"
15
16namespace sw
17{
18 extern bool postBlendSRGB;
19
Nicolas Capens4f172c72016-01-13 08:34:30 -050020 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
Nicolas Capens8833e012016-01-01 23:47:52 -050021 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050022 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
23 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
24 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
25 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
Nicolas Capens8833e012016-01-01 23:47:52 -050026
Nicolas Capens4f172c72016-01-13 08:34:30 -050027 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
28 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
29 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
30 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
Nicolas Capens8833e012016-01-01 23:47:52 -050031 }
32
Nicolas Capens4f172c72016-01-13 08:34:30 -050033 void PixelPipeline::fixedFunction()
Nicolas Capens8833e012016-01-01 23:47:52 -050034 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050035 current = diffuse;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040036 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
37
38 for(int stage = 0; stage < 8; stage++)
39 {
40 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
41 {
42 break;
43 }
44
45 Vector4s texture;
46
47 if(state.textureStage[stage].usesTexture)
48 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050049 sampleTexture(texture, stage, stage);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040050 }
51
Nicolas Capens4f172c72016-01-13 08:34:30 -050052 blendTexture(temp, texture, stage);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040053 }
54
Nicolas Capens4f172c72016-01-13 08:34:30 -050055 specularPixel(current, specular);
Nicolas Capens8833e012016-01-01 23:47:52 -050056 }
57
Nicolas Capens4f172c72016-01-13 08:34:30 -050058 void PixelPipeline::applyShader(Int cMask[4])
Alexis Hetuf2a8c372015-07-13 11:08:41 -040059 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040060 if(!shader)
61 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050062 fixedFunction();
Alexis Hetuf2a8c372015-07-13 11:08:41 -040063 return;
64 }
65
66 int pad = 0; // Count number of texm3x3pad instructions
67 Vector4s dPairing; // Destination for first pairing instruction
68
69 for(size_t i = 0; i < shader->getLength(); i++)
70 {
71 const Shader::Instruction *instruction = shader->getInstruction(i);
72 Shader::Opcode opcode = instruction->opcode;
73
74 // #ifndef NDEBUG // FIXME: Centralize debug output control
75 // shader->printInstruction(i, "debug.txt");
76 // #endif
77
78 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
79 {
80 continue;
81 }
82
83 const Dst &dst = instruction->dst;
84 const Src &src0 = instruction->src[0];
85 const Src &src1 = instruction->src[1];
86 const Src &src2 = instruction->src[2];
87
88 unsigned short version = shader->getVersion();
89 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
90 bool coissue = instruction->coissue; // Second instruction of pair
91
92 Vector4s d;
93 Vector4s s0;
94 Vector4s s1;
95 Vector4s s2;
96
Nicolas Capensc2534f42016-04-04 11:13:24 -040097 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
98 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
99 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400100
Nicolas Capens4f172c72016-01-13 08:34:30 -0500101 Float4 x = version < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
102 Float4 y = version < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
103 Float4 z = version < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
104 Float4 w = version < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400105
106 switch(opcode)
107 {
108 case Shader::OPCODE_PS_1_0: break;
109 case Shader::OPCODE_PS_1_1: break;
110 case Shader::OPCODE_PS_1_2: break;
111 case Shader::OPCODE_PS_1_3: break;
112 case Shader::OPCODE_PS_1_4: break;
113
114 case Shader::OPCODE_DEF: break;
115
116 case Shader::OPCODE_NOP: break;
117 case Shader::OPCODE_MOV: MOV(d, s0); break;
118 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
119 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
120 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
121 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
122 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
123 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
124 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
125 case Shader::OPCODE_TEXCOORD:
126 if(version < 0x0104)
127 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500128 TEXCOORD(d, x, y, z, dst.index);
129 }
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400130 else
131 {
132 if((src0.swizzle & 0x30) == 0x20) // .xyz
133 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500134 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400135 }
Nicolas Capens4f172c72016-01-13 08:34:30 -0500136 else // .xwy
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400137 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500138 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400139 }
140 }
141 break;
142 case Shader::OPCODE_TEXKILL:
143 if(version < 0x0104)
144 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500145 TEXKILL(cMask, x, y, z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400146 }
147 else if(version == 0x0104)
148 {
149 if(dst.type == Shader::PARAMETER_TEXTURE)
150 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500151 TEXKILL(cMask, x, y, z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400152 }
153 else
154 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500155 TEXKILL(cMask, rs[dst.index]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400156 }
157 }
158 else ASSERT(false);
159 break;
160 case Shader::OPCODE_TEX:
161 if(version < 0x0104)
162 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500163 TEX(d, x, y, z, dst.index, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400164 }
165 else if(version == 0x0104)
166 {
167 if(src0.type == Shader::PARAMETER_TEXTURE)
168 {
169 if((src0.swizzle & 0x30) == 0x20) // .xyz
170 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500171 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400172 }
173 else // .xyw
174 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500175 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400176 }
177 }
178 else
179 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500180 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400181 }
182 }
183 else ASSERT(false);
184 break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500185 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break;
186 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break;
187 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break;
188 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break;
189 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
190 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
191 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
192 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
193 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break;
194 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break;
195 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
196 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break;
197 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break;
198 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
199 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break;
200 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
201 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break;
202 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
203 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break;
204 case Shader::OPCODE_PHASE: break;
205 case Shader::OPCODE_END: break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400206 default:
207 ASSERT(false);
208 }
209
210 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
211 {
212 if(dst.shift > 0)
213 {
214 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
215 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
216 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
217 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
218 }
219 else if(dst.shift < 0)
220 {
221 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
222 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
223 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
224 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
225 }
226
227 if(dst.saturate)
228 {
229 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
230 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
231 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
232 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
233 }
234
235 if(pairing)
236 {
237 if(dst.mask & 0x1) dPairing.x = d.x;
238 if(dst.mask & 0x2) dPairing.y = d.y;
239 if(dst.mask & 0x4) dPairing.z = d.z;
240 if(dst.mask & 0x8) dPairing.w = d.w;
241 }
242
243 if(coissue)
244 {
245 const Dst &dst = shader->getInstruction(i - 1)->dst;
246
Nicolas Capens4f172c72016-01-13 08:34:30 -0500247 writeDestination(dPairing, dst);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400248 }
249
250 if(!pairing)
251 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500252 writeDestination(d, dst);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400253 }
254 }
255 }
Nicolas Capens8833e012016-01-01 23:47:52 -0500256 }
257
Nicolas Capens4f172c72016-01-13 08:34:30 -0500258 Bool PixelPipeline::alphaTest(Int cMask[4])
Nicolas Capens8833e012016-01-01 23:47:52 -0500259 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500260 current.x = Min(current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.x = Max(current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
261 current.y = Min(current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.y = Max(current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
262 current.z = Min(current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.z = Max(current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
263 current.w = Min(current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.w = Max(current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400264
265 if(!state.alphaTestActive())
266 {
267 return true;
268 }
269
270 Int aMask;
271
272 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
273 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500274 PixelRoutine::alphaTest(aMask, current.w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400275
276 for(unsigned int q = 0; q < state.multiSample; q++)
277 {
278 cMask[q] &= aMask;
279 }
280 }
281 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
282 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500283 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400284
Nicolas Capens4f172c72016-01-13 08:34:30 -0500285 alphaToCoverage(cMask, alpha);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400286 }
287 else ASSERT(false);
288
289 Int pass = cMask[0];
290
291 for(unsigned int q = 1; q < state.multiSample; q++)
292 {
293 pass = pass | cMask[q];
294 }
295
Nicolas Capens8833e012016-01-01 23:47:52 -0500296 return pass != 0x0;
297 }
298
Nicolas Capens4f172c72016-01-13 08:34:30 -0500299 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
Nicolas Capens8833e012016-01-01 23:47:52 -0500300 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400301 if(!state.colorWriteActive(0))
302 {
303 return;
304 }
305
306 Vector4f oC;
307
308 switch(state.targetFormat[0])
309 {
310 case FORMAT_R5G6B5:
311 case FORMAT_X8R8G8B8:
312 case FORMAT_X8B8G8R8:
313 case FORMAT_A8R8G8B8:
314 case FORMAT_A8B8G8R8:
315 case FORMAT_A8:
316 case FORMAT_G16R16:
317 case FORMAT_A16B16G16R16:
318 if(!postBlendSRGB && state.writeSRGB)
319 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500320 linearToSRGB12_16(current);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400321 }
322 else
323 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500324 current.x <<= 4;
325 current.y <<= 4;
326 current.z <<= 4;
327 current.w <<= 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400328 }
329
330 if(state.targetFormat[0] == FORMAT_R5G6B5)
331 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500332 current.x &= Short4(0xF800u);
333 current.y &= Short4(0xFC00u);
334 current.z &= Short4(0xF800u);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400335 }
336
Nicolas Capens4f172c72016-01-13 08:34:30 -0500337 fogBlend(current, fog);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400338
339 for(unsigned int q = 0; q < state.multiSample; q++)
340 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500341 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
342 Vector4s color = current;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400343
344 if(state.multiSampleMask & (1 << q))
345 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500346 alphaBlend(0, buffer, color, x);
347 logicOperation(0, buffer, color, x);
348 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400349 }
350 }
351 break;
352 case FORMAT_R32F:
353 case FORMAT_G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400354 case FORMAT_X32B32G32R32F:
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400355 case FORMAT_A32B32G32R32F:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500356 convertSigned12(oC, current);
357 PixelRoutine::fogBlend(oC, fog);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400358
359 for(unsigned int q = 0; q < state.multiSample; q++)
360 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500361 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400362 Vector4f color = oC;
363
364 if(state.multiSampleMask & (1 << q))
365 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500366 alphaBlend(0, buffer, color, x);
367 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400368 }
369 }
370 break;
371 default:
372 ASSERT(false);
Nicolas Capens8833e012016-01-01 23:47:52 -0500373 }
374 }
375
Nicolas Capens4f172c72016-01-13 08:34:30 -0500376 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400377 {
378 Vector4s *arg1;
379 Vector4s *arg2;
380 Vector4s *arg3;
381 Vector4s res;
382
383 Vector4s constant;
384 Vector4s tfactor;
385
386 const TextureStage::State &textureStage = state.textureStage[stage];
387
388 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
389 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
390 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
391 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
392 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
393 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
394 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500395 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
396 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
397 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
398 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400399 }
400
401 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
402 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
403 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
404 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
405 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
406 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
407 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500408 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
409 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
410 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
411 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400412 }
413
414 // Premodulate
415 if(stage > 0 && textureStage.usesTexture)
416 {
417 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
418 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500419 current.x = MulHigh(current.x, texture.x) << 4;
420 current.y = MulHigh(current.y, texture.y) << 4;
421 current.z = MulHigh(current.z, texture.z) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400422 }
423
424 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
425 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500426 current.w = MulHigh(current.w, texture.w) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400427 }
428 }
429
430 if(luminance)
431 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500432 texture.x = MulHigh(texture.x, L) << 4;
433 texture.y = MulHigh(texture.y, L) << 4;
434 texture.z = MulHigh(texture.z, L) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400435
436 luminance = false;
437 }
438
439 switch(textureStage.firstArgument)
440 {
441 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
442 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500443 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
444 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
445 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400446 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
447 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
448 default:
449 ASSERT(false);
450 }
451
452 switch(textureStage.secondArgument)
453 {
454 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
455 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500456 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
457 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
458 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400459 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
460 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
461 default:
462 ASSERT(false);
463 }
464
465 switch(textureStage.thirdArgument)
466 {
467 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
468 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500469 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
470 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
471 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400472 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
473 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
474 default:
475 ASSERT(false);
476 }
477
478 Vector4s mod1;
479 Vector4s mod2;
480 Vector4s mod3;
481
482 switch(textureStage.firstModifier)
483 {
484 case TextureStage::MODIFIER_COLOR:
485 break;
486 case TextureStage::MODIFIER_INVCOLOR:
487 mod1.x = SubSat(Short4(0x1000), arg1->x);
488 mod1.y = SubSat(Short4(0x1000), arg1->y);
489 mod1.z = SubSat(Short4(0x1000), arg1->z);
490 mod1.w = SubSat(Short4(0x1000), arg1->w);
491
492 arg1 = &mod1;
493 break;
494 case TextureStage::MODIFIER_ALPHA:
495 mod1.x = arg1->w;
496 mod1.y = arg1->w;
497 mod1.z = arg1->w;
498 mod1.w = arg1->w;
499
500 arg1 = &mod1;
501 break;
502 case TextureStage::MODIFIER_INVALPHA:
503 mod1.x = SubSat(Short4(0x1000), arg1->w);
504 mod1.y = SubSat(Short4(0x1000), arg1->w);
505 mod1.z = SubSat(Short4(0x1000), arg1->w);
506 mod1.w = SubSat(Short4(0x1000), arg1->w);
507
508 arg1 = &mod1;
509 break;
510 default:
511 ASSERT(false);
512 }
513
514 switch(textureStage.secondModifier)
515 {
516 case TextureStage::MODIFIER_COLOR:
517 break;
518 case TextureStage::MODIFIER_INVCOLOR:
519 mod2.x = SubSat(Short4(0x1000), arg2->x);
520 mod2.y = SubSat(Short4(0x1000), arg2->y);
521 mod2.z = SubSat(Short4(0x1000), arg2->z);
522 mod2.w = SubSat(Short4(0x1000), arg2->w);
523
524 arg2 = &mod2;
525 break;
526 case TextureStage::MODIFIER_ALPHA:
527 mod2.x = arg2->w;
528 mod2.y = arg2->w;
529 mod2.z = arg2->w;
530 mod2.w = arg2->w;
531
532 arg2 = &mod2;
533 break;
534 case TextureStage::MODIFIER_INVALPHA:
535 mod2.x = SubSat(Short4(0x1000), arg2->w);
536 mod2.y = SubSat(Short4(0x1000), arg2->w);
537 mod2.z = SubSat(Short4(0x1000), arg2->w);
538 mod2.w = SubSat(Short4(0x1000), arg2->w);
539
540 arg2 = &mod2;
541 break;
542 default:
543 ASSERT(false);
544 }
545
546 switch(textureStage.thirdModifier)
547 {
548 case TextureStage::MODIFIER_COLOR:
549 break;
550 case TextureStage::MODIFIER_INVCOLOR:
551 mod3.x = SubSat(Short4(0x1000), arg3->x);
552 mod3.y = SubSat(Short4(0x1000), arg3->y);
553 mod3.z = SubSat(Short4(0x1000), arg3->z);
554 mod3.w = SubSat(Short4(0x1000), arg3->w);
555
556 arg3 = &mod3;
557 break;
558 case TextureStage::MODIFIER_ALPHA:
559 mod3.x = arg3->w;
560 mod3.y = arg3->w;
561 mod3.z = arg3->w;
562 mod3.w = arg3->w;
563
564 arg3 = &mod3;
565 break;
566 case TextureStage::MODIFIER_INVALPHA:
567 mod3.x = SubSat(Short4(0x1000), arg3->w);
568 mod3.y = SubSat(Short4(0x1000), arg3->w);
569 mod3.z = SubSat(Short4(0x1000), arg3->w);
570 mod3.w = SubSat(Short4(0x1000), arg3->w);
571
572 arg3 = &mod3;
573 break;
574 default:
575 ASSERT(false);
576 }
577
578 switch(textureStage.stageOperation)
579 {
580 case TextureStage::STAGE_DISABLE:
581 break;
582 case TextureStage::STAGE_SELECTARG1: // Arg1
583 res.x = arg1->x;
584 res.y = arg1->y;
585 res.z = arg1->z;
586 break;
587 case TextureStage::STAGE_SELECTARG2: // Arg2
588 res.x = arg2->x;
589 res.y = arg2->y;
590 res.z = arg2->z;
591 break;
592 case TextureStage::STAGE_SELECTARG3: // Arg3
593 res.x = arg3->x;
594 res.y = arg3->y;
595 res.z = arg3->z;
596 break;
597 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
598 res.x = MulHigh(arg1->x, arg2->x) << 4;
599 res.y = MulHigh(arg1->y, arg2->y) << 4;
600 res.z = MulHigh(arg1->z, arg2->z) << 4;
601 break;
602 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
603 res.x = MulHigh(arg1->x, arg2->x) << 5;
604 res.y = MulHigh(arg1->y, arg2->y) << 5;
605 res.z = MulHigh(arg1->z, arg2->z) << 5;
606 break;
607 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
608 res.x = MulHigh(arg1->x, arg2->x) << 6;
609 res.y = MulHigh(arg1->y, arg2->y) << 6;
610 res.z = MulHigh(arg1->z, arg2->z) << 6;
611 break;
612 case TextureStage::STAGE_ADD: // Arg1 + Arg2
613 res.x = AddSat(arg1->x, arg2->x);
614 res.y = AddSat(arg1->y, arg2->y);
615 res.z = AddSat(arg1->z, arg2->z);
616 break;
617 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
618 res.x = AddSat(arg1->x, arg2->x);
619 res.y = AddSat(arg1->y, arg2->y);
620 res.z = AddSat(arg1->z, arg2->z);
621
622 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
623 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
624 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
625 break;
626 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
627 res.x = AddSat(arg1->x, arg2->x);
628 res.y = AddSat(arg1->y, arg2->y);
629 res.z = AddSat(arg1->z, arg2->z);
630
631 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
632 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
633 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
634
635 res.x = AddSat(res.x, res.x);
636 res.y = AddSat(res.y, res.y);
637 res.z = AddSat(res.z, res.z);
638 break;
639 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
640 res.x = SubSat(arg1->x, arg2->x);
641 res.y = SubSat(arg1->y, arg2->y);
642 res.z = SubSat(arg1->z, arg2->z);
643 break;
644 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
645 {
646 Short4 tmp;
647
648 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
649 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
650 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
651 }
652 break;
653 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
654 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
655 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
656 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
657 break;
658 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
659 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
660 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
661 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
662 break;
663 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
664 {
665 Short4 tmp;
666
667 res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
668 res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
669 res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
670
671 res.x = res.x << 6;
672 res.y = res.y << 6;
673 res.z = res.z << 6;
674
675 res.x = AddSat(res.x, res.y);
676 res.x = AddSat(res.x, res.z);
677
678 // Clamp to [0, 1]
679 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
680 res.x = Min(res.x, Short4(0x1000));
681
682 res.y = res.x;
683 res.z = res.x;
684 res.w = res.x;
685 }
686 break;
687 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500688 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
689 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
690 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400691 break;
692 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500693 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
694 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
695 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400696 break;
697 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500698 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
699 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
700 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400701 break;
702 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
703 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
704 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
705 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
706 break;
707 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
708 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
709 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
710 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
711 break;
712 case TextureStage::STAGE_PREMODULATE:
713 res.x = arg1->x;
714 res.y = arg1->y;
715 res.z = arg1->z;
716 break;
717 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
718 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
719 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
720 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
721 break;
722 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
723 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
724 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
725 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
726 break;
727 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
728 {
729 Short4 tmp;
730
731 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
732 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
733 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
734 }
735 break;
736 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
737 {
738 Short4 tmp;
739
740 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
741 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
742 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
743 }
744 break;
745 case TextureStage::STAGE_BUMPENVMAP:
746 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500747 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
748 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400749
750 Float4 du2;
751 Float4 dv2;
752
Nicolas Capens4f172c72016-01-13 08:34:30 -0500753 du2 = du;
754 dv2 = dv;
755 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
756 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
757 du += dv2;
758 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
759 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
760 dv += du2;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400761
762 perturbate = true;
763
Nicolas Capens4f172c72016-01-13 08:34:30 -0500764 res.x = current.x;
765 res.y = current.y;
766 res.z = current.z;
767 res.w = current.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400768 }
769 break;
770 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
771 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500772 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
773 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400774
775 Float4 du2;
776 Float4 dv2;
777
Nicolas Capens4f172c72016-01-13 08:34:30 -0500778 du2 = du;
779 dv2 = dv;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400780
Nicolas Capens4f172c72016-01-13 08:34:30 -0500781 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
782 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
783 du += dv2;
784 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
785 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
786 dv += du2;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400787
788 perturbate = true;
789
Nicolas Capens4f172c72016-01-13 08:34:30 -0500790 L = texture.z;
791 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
792 L = L << 4;
793 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
794 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
795 L = Min(L, Short4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400796
797 luminance = true;
798
Nicolas Capens4f172c72016-01-13 08:34:30 -0500799 res.x = current.x;
800 res.y = current.y;
801 res.z = current.z;
802 res.w = current.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400803 }
804 break;
805 default:
806 ASSERT(false);
807 }
808
809 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
810 {
811 switch(textureStage.firstArgumentAlpha)
812 {
813 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
814 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500815 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
816 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
817 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400818 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
819 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
820 default:
821 ASSERT(false);
822 }
823
824 switch(textureStage.secondArgumentAlpha)
825 {
826 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
827 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500828 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
829 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
830 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400831 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
832 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
833 default:
834 ASSERT(false);
835 }
836
837 switch(textureStage.thirdArgumentAlpha)
838 {
839 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
840 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500841 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
842 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
843 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400844 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
845 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
846 default:
847 ASSERT(false);
848 }
849
850 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
851 {
852 case TextureStage::MODIFIER_COLOR:
853 break;
854 case TextureStage::MODIFIER_INVCOLOR:
855 mod1.w = SubSat(Short4(0x1000), arg1->w);
856
857 arg1 = &mod1;
858 break;
859 case TextureStage::MODIFIER_ALPHA:
860 // Redudant
861 break;
862 case TextureStage::MODIFIER_INVALPHA:
863 mod1.w = SubSat(Short4(0x1000), arg1->w);
864
865 arg1 = &mod1;
866 break;
867 default:
868 ASSERT(false);
869 }
870
871 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
872 {
873 case TextureStage::MODIFIER_COLOR:
874 break;
875 case TextureStage::MODIFIER_INVCOLOR:
876 mod2.w = SubSat(Short4(0x1000), arg2->w);
877
878 arg2 = &mod2;
879 break;
880 case TextureStage::MODIFIER_ALPHA:
881 // Redudant
882 break;
883 case TextureStage::MODIFIER_INVALPHA:
884 mod2.w = SubSat(Short4(0x1000), arg2->w);
885
886 arg2 = &mod2;
887 break;
888 default:
889 ASSERT(false);
890 }
891
892 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
893 {
894 case TextureStage::MODIFIER_COLOR:
895 break;
896 case TextureStage::MODIFIER_INVCOLOR:
897 mod3.w = SubSat(Short4(0x1000), arg3->w);
898
899 arg3 = &mod3;
900 break;
901 case TextureStage::MODIFIER_ALPHA:
902 // Redudant
903 break;
904 case TextureStage::MODIFIER_INVALPHA:
905 mod3.w = SubSat(Short4(0x1000), arg3->w);
906
907 arg3 = &mod3;
908 break;
909 default:
910 ASSERT(false);
911 }
912
913 switch(textureStage.stageOperationAlpha)
914 {
915 case TextureStage::STAGE_DISABLE:
916 break;
917 case TextureStage::STAGE_SELECTARG1: // Arg1
918 res.w = arg1->w;
919 break;
920 case TextureStage::STAGE_SELECTARG2: // Arg2
921 res.w = arg2->w;
922 break;
923 case TextureStage::STAGE_SELECTARG3: // Arg3
924 res.w = arg3->w;
925 break;
926 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
927 res.w = MulHigh(arg1->w, arg2->w) << 4;
928 break;
929 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
930 res.w = MulHigh(arg1->w, arg2->w) << 5;
931 break;
932 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
933 res.w = MulHigh(arg1->w, arg2->w) << 6;
934 break;
935 case TextureStage::STAGE_ADD: // Arg1 + Arg2
936 res.w = AddSat(arg1->w, arg2->w);
937 break;
938 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
939 res.w = AddSat(arg1->w, arg2->w);
940 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
941 break;
942 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
943 res.w = AddSat(arg1->w, arg2->w);
944 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
945 res.w = AddSat(res.w, res.w);
946 break;
947 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
948 res.w = SubSat(arg1->w, arg2->w);
949 break;
950 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
951 {
952 Short4 tmp;
953
954 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
955 }
956 break;
957 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
958 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
959 break;
960 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
961 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
962 break;
963 case TextureStage::STAGE_DOT3:
964 break; // Already computed in color channel
965 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500966 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400967 break;
968 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500969 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400970 break;
971 case TextureStage::STAGE_BLENDFACTORALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500972 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400973 break;
974 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
975 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
976 break;
977 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
978 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
979 break;
980 case TextureStage::STAGE_PREMODULATE:
981 res.w = arg1->w;
982 break;
983 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
984 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
985 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
986 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
987 case TextureStage::STAGE_BUMPENVMAP:
988 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
989 break; // Invalid alpha operations
990 default:
991 ASSERT(false);
992 }
993 }
994
995 // Clamp result to [0, 1]
996
997 switch(textureStage.stageOperation)
998 {
999 case TextureStage::STAGE_DISABLE:
1000 case TextureStage::STAGE_SELECTARG1:
1001 case TextureStage::STAGE_SELECTARG2:
1002 case TextureStage::STAGE_SELECTARG3:
1003 case TextureStage::STAGE_MODULATE:
1004 case TextureStage::STAGE_MODULATE2X:
1005 case TextureStage::STAGE_MODULATE4X:
1006 case TextureStage::STAGE_ADD:
1007 case TextureStage::STAGE_MULTIPLYADD:
1008 case TextureStage::STAGE_LERP:
1009 case TextureStage::STAGE_BLENDCURRENTALPHA:
1010 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1011 case TextureStage::STAGE_BLENDFACTORALPHA:
1012 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1013 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1014 case TextureStage::STAGE_DOT3: // Already clamped
1015 case TextureStage::STAGE_PREMODULATE:
1016 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1017 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1018 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1019 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1020 case TextureStage::STAGE_BUMPENVMAP:
1021 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1022 if(state.textureStage[stage].cantUnderflow)
1023 {
1024 break; // Can't go below zero
1025 }
1026 case TextureStage::STAGE_ADDSIGNED:
1027 case TextureStage::STAGE_ADDSIGNED2X:
1028 case TextureStage::STAGE_SUBTRACT:
1029 case TextureStage::STAGE_ADDSMOOTH:
1030 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1031 res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1032 res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1033 break;
1034 default:
1035 ASSERT(false);
1036 }
1037
1038 switch(textureStage.stageOperationAlpha)
1039 {
1040 case TextureStage::STAGE_DISABLE:
1041 case TextureStage::STAGE_SELECTARG1:
1042 case TextureStage::STAGE_SELECTARG2:
1043 case TextureStage::STAGE_SELECTARG3:
1044 case TextureStage::STAGE_MODULATE:
1045 case TextureStage::STAGE_MODULATE2X:
1046 case TextureStage::STAGE_MODULATE4X:
1047 case TextureStage::STAGE_ADD:
1048 case TextureStage::STAGE_MULTIPLYADD:
1049 case TextureStage::STAGE_LERP:
1050 case TextureStage::STAGE_BLENDCURRENTALPHA:
1051 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1052 case TextureStage::STAGE_BLENDFACTORALPHA:
1053 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1054 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1055 case TextureStage::STAGE_DOT3: // Already clamped
1056 case TextureStage::STAGE_PREMODULATE:
1057 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1058 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1059 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1060 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1061 case TextureStage::STAGE_BUMPENVMAP:
1062 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1063 if(state.textureStage[stage].cantUnderflow)
1064 {
1065 break; // Can't go below zero
1066 }
1067 case TextureStage::STAGE_ADDSIGNED:
1068 case TextureStage::STAGE_ADDSIGNED2X:
1069 case TextureStage::STAGE_SUBTRACT:
1070 case TextureStage::STAGE_ADDSMOOTH:
1071 res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1072 break;
1073 default:
1074 ASSERT(false);
1075 }
1076
1077 switch(textureStage.stageOperation)
1078 {
1079 case TextureStage::STAGE_DISABLE:
1080 case TextureStage::STAGE_SELECTARG1:
1081 case TextureStage::STAGE_SELECTARG2:
1082 case TextureStage::STAGE_SELECTARG3:
1083 case TextureStage::STAGE_MODULATE:
1084 case TextureStage::STAGE_SUBTRACT:
1085 case TextureStage::STAGE_ADDSMOOTH:
1086 case TextureStage::STAGE_LERP:
1087 case TextureStage::STAGE_BLENDCURRENTALPHA:
1088 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1089 case TextureStage::STAGE_BLENDFACTORALPHA:
1090 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1091 case TextureStage::STAGE_DOT3: // Already clamped
1092 case TextureStage::STAGE_PREMODULATE:
1093 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1094 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1095 case TextureStage::STAGE_BUMPENVMAP:
1096 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1097 break; // Can't go above one
1098 case TextureStage::STAGE_MODULATE2X:
1099 case TextureStage::STAGE_MODULATE4X:
1100 case TextureStage::STAGE_ADD:
1101 case TextureStage::STAGE_ADDSIGNED:
1102 case TextureStage::STAGE_ADDSIGNED2X:
1103 case TextureStage::STAGE_MULTIPLYADD:
1104 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1105 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1106 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1107 res.x = Min(res.x, Short4(0x1000));
1108 res.y = Min(res.y, Short4(0x1000));
1109 res.z = Min(res.z, Short4(0x1000));
1110 break;
1111 default:
1112 ASSERT(false);
1113 }
1114
1115 switch(textureStage.stageOperationAlpha)
1116 {
1117 case TextureStage::STAGE_DISABLE:
1118 case TextureStage::STAGE_SELECTARG1:
1119 case TextureStage::STAGE_SELECTARG2:
1120 case TextureStage::STAGE_SELECTARG3:
1121 case TextureStage::STAGE_MODULATE:
1122 case TextureStage::STAGE_SUBTRACT:
1123 case TextureStage::STAGE_ADDSMOOTH:
1124 case TextureStage::STAGE_LERP:
1125 case TextureStage::STAGE_BLENDCURRENTALPHA:
1126 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1127 case TextureStage::STAGE_BLENDFACTORALPHA:
1128 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1129 case TextureStage::STAGE_DOT3: // Already clamped
1130 case TextureStage::STAGE_PREMODULATE:
1131 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1132 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1133 case TextureStage::STAGE_BUMPENVMAP:
1134 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1135 break; // Can't go above one
1136 case TextureStage::STAGE_MODULATE2X:
1137 case TextureStage::STAGE_MODULATE4X:
1138 case TextureStage::STAGE_ADD:
1139 case TextureStage::STAGE_ADDSIGNED:
1140 case TextureStage::STAGE_ADDSIGNED2X:
1141 case TextureStage::STAGE_MULTIPLYADD:
1142 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1143 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1144 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1145 res.w = Min(res.w, Short4(0x1000));
1146 break;
1147 default:
1148 ASSERT(false);
1149 }
1150
1151 switch(textureStage.destinationArgument)
1152 {
1153 case TextureStage::DESTINATION_CURRENT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001154 current.x = res.x;
1155 current.y = res.y;
1156 current.z = res.z;
1157 current.w = res.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001158 break;
1159 case TextureStage::DESTINATION_TEMP:
1160 temp.x = res.x;
1161 temp.y = res.y;
1162 temp.z = res.z;
1163 temp.w = res.w;
1164 break;
1165 default:
1166 ASSERT(false);
1167 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001168 }
1169
Nicolas Capens4f172c72016-01-13 08:34:30 -05001170 void PixelPipeline::fogBlend(Vector4s &current, Float4 &f)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001171 {
1172 if(!state.fogActive)
1173 {
1174 return;
1175 }
1176
1177 if(state.pixelFogMode != FOG_NONE)
1178 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001179 pixelFog(f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001180 }
1181
1182 UShort4 fog = convertFixed16(f, true);
1183
1184 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1185 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1186 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1187
1188 UShort4 invFog = UShort4(0xFFFFu) - fog;
1189
Nicolas Capens4f172c72016-01-13 08:34:30 -05001190 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1191 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1192 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
Nicolas Capens8833e012016-01-01 23:47:52 -05001193 }
1194
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001195 void PixelPipeline::specularPixel(Vector4s &current, Vector4s &specular)
1196 {
1197 if(!state.specularAdd)
1198 {
1199 return;
1200 }
1201
1202 current.x = AddSat(current.x, specular.x);
1203 current.y = AddSat(current.y, specular.y);
1204 current.z = AddSat(current.z, specular.z);
Nicolas Capens8833e012016-01-01 23:47:52 -05001205 }
1206
Nicolas Capens4f172c72016-01-13 08:34:30 -05001207 void PixelPipeline::sampleTexture(Vector4s &c, int coordinates, int stage, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001208 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001209 Float4 x = v[2 + coordinates].x;
1210 Float4 y = v[2 + coordinates].y;
1211 Float4 z = v[2 + coordinates].z;
1212 Float4 w = v[2 + coordinates].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001213
1214 if(perturbate)
1215 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001216 x += du;
1217 y += dv;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001218
1219 perturbate = false;
1220 }
1221
Nicolas Capens4f172c72016-01-13 08:34:30 -05001222 sampleTexture(c, stage, x, y, z, w, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001223 }
1224
Nicolas Capensc2534f42016-04-04 11:13:24 -04001225 void PixelPipeline::sampleTexture(Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001226 {
Nicolas Capensc2534f42016-04-04 11:13:24 -04001227 #if PERF_PROFILE
1228 Long texTime = Ticks();
1229 #endif
1230
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001231 Vector4f dsx;
1232 Vector4f dsy;
1233
Nicolas Capens4f172c72016-01-13 08:34:30 -05001234 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001235
1236 if(!project)
1237 {
Nicolas Capensc2534f42016-04-04 11:13:24 -04001238 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001239 }
1240 else
1241 {
1242 Float4 rq = reciprocal(q);
1243
1244 Float4 u_q = u * rq;
1245 Float4 v_q = v * rq;
1246 Float4 w_q = w * rq;
1247
Nicolas Capensc2534f42016-04-04 11:13:24 -04001248 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001249 }
1250
Nicolas Capensc2534f42016-04-04 11:13:24 -04001251 #if PERF_PROFILE
1252 cycles[PERF_TEX] += Ticks() - texTime;
1253 #endif
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001254 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001255
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001256 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1257 {
1258 return RoundShort4(cf * Float4(0x1000));
1259 }
1260
1261 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1262 {
1263 cs.x = convertFixed12(cf.x);
1264 cs.y = convertFixed12(cf.y);
1265 cs.z = convertFixed12(cf.z);
1266 cs.w = convertFixed12(cf.w);
1267 }
1268
1269 Float4 PixelPipeline::convertSigned12(Short4 &cs)
1270 {
1271 return Float4(cs) * Float4(1.0f / 0x0FFE);
1272 }
1273
1274 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1275 {
1276 cf.x = convertSigned12(cs.x);
1277 cf.y = convertSigned12(cs.y);
1278 cf.z = convertSigned12(cs.z);
1279 cf.w = convertSigned12(cs.w);
1280 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001281
Nicolas Capens4f172c72016-01-13 08:34:30 -05001282 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001283 {
1284 switch(dst.type)
1285 {
1286 case Shader::PARAMETER_TEMP:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001287 if(dst.mask & 0x1) rs[dst.index].x = d.x;
1288 if(dst.mask & 0x2) rs[dst.index].y = d.y;
1289 if(dst.mask & 0x4) rs[dst.index].z = d.z;
1290 if(dst.mask & 0x8) rs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001291 break;
1292 case Shader::PARAMETER_INPUT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001293 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1294 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1295 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1296 if(dst.mask & 0x8) vs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001297 break;
1298 case Shader::PARAMETER_CONST: ASSERT(false); break;
1299 case Shader::PARAMETER_TEXTURE:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001300 if(dst.mask & 0x1) ts[dst.index].x = d.x;
1301 if(dst.mask & 0x2) ts[dst.index].y = d.y;
1302 if(dst.mask & 0x4) ts[dst.index].z = d.z;
1303 if(dst.mask & 0x8) ts[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001304 break;
1305 case Shader::PARAMETER_COLOROUT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001306 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1307 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1308 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1309 if(dst.mask & 0x8) vs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001310 break;
1311 default:
1312 ASSERT(false);
1313 }
1314 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001315
Nicolas Capensc2534f42016-04-04 11:13:24 -04001316 Vector4s PixelPipeline::fetchRegister(const Src &src)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001317 {
1318 Vector4s *reg;
1319 int i = src.index;
1320
1321 Vector4s c;
1322
1323 if(src.type == Shader::PARAMETER_CONST)
1324 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001325 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1326 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1327 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1328 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001329 }
1330
1331 switch(src.type)
1332 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001333 case Shader::PARAMETER_TEMP: reg = &rs[i]; break;
1334 case Shader::PARAMETER_INPUT: reg = &vs[i]; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001335 case Shader::PARAMETER_CONST: reg = &c; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001336 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break;
1337 case Shader::PARAMETER_VOID: return rs[0]; // Dummy
1338 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1339 default: ASSERT(false); return rs[0];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001340 }
1341
1342 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1343 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1344 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1345 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1346
1347 Vector4s mod;
1348
1349 switch(src.modifier)
1350 {
1351 case Shader::MODIFIER_NONE:
1352 mod.x = x;
1353 mod.y = y;
1354 mod.z = z;
1355 mod.w = w;
1356 break;
1357 case Shader::MODIFIER_BIAS:
1358 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1359 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1360 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1361 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1362 break;
1363 case Shader::MODIFIER_BIAS_NEGATE:
1364 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1365 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1366 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1367 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1368 break;
1369 case Shader::MODIFIER_COMPLEMENT:
1370 mod.x = SubSat(Short4(0x1000), x);
1371 mod.y = SubSat(Short4(0x1000), y);
1372 mod.z = SubSat(Short4(0x1000), z);
1373 mod.w = SubSat(Short4(0x1000), w);
1374 break;
1375 case Shader::MODIFIER_NEGATE:
1376 mod.x = -x;
1377 mod.y = -y;
1378 mod.z = -z;
1379 mod.w = -w;
1380 break;
1381 case Shader::MODIFIER_X2:
1382 mod.x = AddSat(x, x);
1383 mod.y = AddSat(y, y);
1384 mod.z = AddSat(z, z);
1385 mod.w = AddSat(w, w);
1386 break;
1387 case Shader::MODIFIER_X2_NEGATE:
1388 mod.x = -AddSat(x, x);
1389 mod.y = -AddSat(y, y);
1390 mod.z = -AddSat(z, z);
1391 mod.w = -AddSat(w, w);
1392 break;
1393 case Shader::MODIFIER_SIGN:
1394 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1395 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1396 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1397 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1398 mod.x = AddSat(mod.x, mod.x);
1399 mod.y = AddSat(mod.y, mod.y);
1400 mod.z = AddSat(mod.z, mod.z);
1401 mod.w = AddSat(mod.w, mod.w);
1402 break;
1403 case Shader::MODIFIER_SIGN_NEGATE:
1404 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1405 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1406 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1407 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1408 mod.x = AddSat(mod.x, mod.x);
1409 mod.y = AddSat(mod.y, mod.y);
1410 mod.z = AddSat(mod.z, mod.z);
1411 mod.w = AddSat(mod.w, mod.w);
1412 break;
1413 case Shader::MODIFIER_DZ:
1414 mod.x = x;
1415 mod.y = y;
1416 mod.z = z;
1417 mod.w = w;
1418 // Projection performed by texture sampler
1419 break;
1420 case Shader::MODIFIER_DW:
1421 mod.x = x;
1422 mod.y = y;
1423 mod.z = z;
1424 mod.w = w;
1425 // Projection performed by texture sampler
1426 break;
1427 default:
1428 ASSERT(false);
1429 }
1430
1431 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1432 {
1433 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1434 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1435 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1436 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1437 }
1438
1439 return mod;
1440 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001441
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001442 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1443 {
1444 dst.x = src0.x;
1445 dst.y = src0.y;
1446 dst.z = src0.z;
1447 dst.w = src0.w;
1448 }
1449
1450 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1451 {
1452 dst.x = AddSat(src0.x, src1.x);
1453 dst.y = AddSat(src0.y, src1.y);
1454 dst.z = AddSat(src0.z, src1.z);
1455 dst.w = AddSat(src0.w, src1.w);
1456 }
1457
1458 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1459 {
1460 dst.x = SubSat(src0.x, src1.x);
1461 dst.y = SubSat(src0.y, src1.y);
1462 dst.z = SubSat(src0.z, src1.z);
1463 dst.w = SubSat(src0.w, src1.w);
1464 }
1465
1466 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1467 {
1468 // FIXME: Long fixed-point multiply fixup
1469 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1470 {
1471 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1472 }
1473 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1474 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1475 }
1476
1477 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1478 {
1479 // FIXME: Long fixed-point multiply fixup
1480 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1481 {
1482 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);
1483 }
1484 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1485 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1486 }
1487
1488 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1489 {
1490 Short4 t0;
1491 Short4 t1;
1492
1493 // FIXME: Long fixed-point multiply fixup
1494 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1495 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1496 t0 = AddSat(t0, t1);
1497 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1498 t0 = AddSat(t0, t1);
1499
1500 dst.x = t0;
1501 dst.y = t0;
1502 dst.z = t0;
1503 dst.w = t0;
1504 }
1505
1506 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1507 {
1508 Short4 t0;
1509 Short4 t1;
1510
1511 // FIXME: Long fixed-point multiply fixup
1512 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1513 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1514 t0 = AddSat(t0, t1);
1515 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1516 t0 = AddSat(t0, t1);
1517 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1518 t0 = AddSat(t0, t1);
1519
1520 dst.x = t0;
1521 dst.y = t0;
1522 dst.z = t0;
1523 dst.w = t0;
1524 }
1525
1526 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1527 {
1528 // FIXME: Long fixed-point multiply fixup
1529 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1530 {
1531 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1532 }
1533 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1534 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1535 }
1536
1537 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1538 {
1539 Float4 uw;
1540 Float4 vw;
1541 Float4 sw;
1542
1543 if(state.interpolant[2 + coordinate].component & 0x01)
1544 {
1545 uw = Max(u, Float4(0.0f));
1546 uw = Min(uw, Float4(1.0f));
1547 dst.x = convertFixed12(uw);
1548 }
1549 else
1550 {
1551 dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1552 }
1553
1554 if(state.interpolant[2 + coordinate].component & 0x02)
1555 {
1556 vw = Max(v, Float4(0.0f));
1557 vw = Min(vw, Float4(1.0f));
1558 dst.y = convertFixed12(vw);
1559 }
1560 else
1561 {
1562 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1563 }
1564
1565 if(state.interpolant[2 + coordinate].component & 0x04)
1566 {
1567 sw = Max(s, Float4(0.0f));
1568 sw = Min(sw, Float4(1.0f));
1569 dst.z = convertFixed12(sw);
1570 }
1571 else
1572 {
1573 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1574 }
1575
1576 dst.w = Short4(0x1000);
1577 }
1578
1579 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1580 {
1581 Float4 uw = u;
1582 Float4 vw = v;
1583 Float4 sw = s;
1584
1585 if(project)
1586 {
1587 uw *= Rcp_pp(s);
1588 vw *= Rcp_pp(s);
1589 }
1590
1591 if(state.interpolant[2 + coordinate].component & 0x01)
1592 {
1593 uw *= Float4(0x1000);
1594 uw = Max(uw, Float4(-0x8000));
1595 uw = Min(uw, Float4(0x7FFF));
1596 dst.x = RoundShort4(uw);
1597 }
1598 else
1599 {
1600 dst.x = Short4(0x0000);
1601 }
1602
1603 if(state.interpolant[2 + coordinate].component & 0x02)
1604 {
1605 vw *= Float4(0x1000);
1606 vw = Max(vw, Float4(-0x8000));
1607 vw = Min(vw, Float4(0x7FFF));
1608 dst.y = RoundShort4(vw);
1609 }
1610 else
1611 {
1612 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1613 }
1614
1615 if(state.interpolant[2 + coordinate].component & 0x04)
1616 {
1617 sw *= Float4(0x1000);
1618 sw = Max(sw, Float4(-0x8000));
1619 sw = Min(sw, Float4(0x7FFF));
1620 dst.z = RoundShort4(sw);
1621 }
1622 else
1623 {
1624 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1625 }
1626 }
1627
Nicolas Capens4f172c72016-01-13 08:34:30 -05001628 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001629 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001630 TEXM3X3PAD(u, v, s, src, 0, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001631
Nicolas Capens4f172c72016-01-13 08:34:30 -05001632 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001633
1634 dst.x = t0;
1635 dst.y = t0;
1636 dst.z = t0;
1637 dst.w = t0;
1638 }
1639
Nicolas Capens4f172c72016-01-13 08:34:30 -05001640 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001641 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001642 TEXM3X3PAD(u, v, s, src0, 0, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001643
Nicolas Capens4f172c72016-01-13 08:34:30 -05001644 v_ = Float4(0.0f);
1645 w_ = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001646
Nicolas Capens4f172c72016-01-13 08:34:30 -05001647 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001648 }
1649
1650 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1651 {
1652 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1653 SignMask(CmpNLT(v, Float4(0.0f))) &
1654 SignMask(CmpNLT(s, Float4(0.0f)));
1655
1656 for(unsigned int q = 0; q < state.multiSample; q++)
1657 {
1658 cMask[q] &= kill;
1659 }
1660 }
1661
1662 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1663 {
1664 Short4 test = src.x | src.y | src.z;
1665 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
1666
1667 for(unsigned int q = 0; q < state.multiSample; q++)
1668 {
1669 cMask[q] &= kill;
1670 }
1671 }
1672
Nicolas Capens4f172c72016-01-13 08:34:30 -05001673 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001674 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001675 sampleTexture(dst, sampler, u, v, s, s, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001676 }
1677
Nicolas Capens4f172c72016-01-13 08:34:30 -05001678 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001679 {
1680 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1681 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1682 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1683
Nicolas Capens4f172c72016-01-13 08:34:30 -05001684 sampleTexture(dst, sampler, u, v, s, s, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001685 }
1686
Nicolas Capens4f172c72016-01-13 08:34:30 -05001687 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001688 {
1689 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1690 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1691
1692 Float4 du2 = du;
1693 Float4 dv2 = dv;
1694
Nicolas Capens4f172c72016-01-13 08:34:30 -05001695 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1696 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001697 du += dv2;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001698 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1699 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001700 dv += du2;
1701
1702 Float4 u_ = u + du;
1703 Float4 v_ = v + dv;
1704
Nicolas Capens4f172c72016-01-13 08:34:30 -05001705 sampleTexture(dst, stage, u_, v_, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001706 }
1707
Nicolas Capens4f172c72016-01-13 08:34:30 -05001708 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001709 {
1710 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1711 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1712
1713 Float4 du2 = du;
1714 Float4 dv2 = dv;
1715
Nicolas Capens4f172c72016-01-13 08:34:30 -05001716 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1717 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001718 du += dv2;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001719 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1720 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001721 dv += du2;
1722
1723 Float4 u_ = u + du;
1724 Float4 v_ = v + dv;
1725
Nicolas Capens4f172c72016-01-13 08:34:30 -05001726 sampleTexture(dst, stage, u_, v_, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001727
1728 Short4 L;
1729
1730 L = src.z;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001731 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001732 L = L << 4;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001733 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001734 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1735 L = Min(L, Short4(0x1000));
1736
1737 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1738 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1739 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1740 }
1741
Nicolas Capens4f172c72016-01-13 08:34:30 -05001742 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001743 {
1744 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1745 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1746 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1747
Nicolas Capens4f172c72016-01-13 08:34:30 -05001748 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001749 }
1750
Nicolas Capens4f172c72016-01-13 08:34:30 -05001751 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001752 {
1753 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1754 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1755 Float4 s = v;
1756
Nicolas Capens4f172c72016-01-13 08:34:30 -05001757 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001758 }
1759
Nicolas Capens4f172c72016-01-13 08:34:30 -05001760 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001761 {
1762 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1763 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1764 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1765
Nicolas Capens4f172c72016-01-13 08:34:30 -05001766 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001767 }
1768
Nicolas Capens4f172c72016-01-13 08:34:30 -05001769 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001770 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001771 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001772
1773 // z / w
Nicolas Capens4f172c72016-01-13 08:34:30 -05001774 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001775
Nicolas Capens4f172c72016-01-13 08:34:30 -05001776 oDepth = u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001777 }
1778
Nicolas Capens4f172c72016-01-13 08:34:30 -05001779 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001780 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001781 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001782 }
1783
Nicolas Capens4f172c72016-01-13 08:34:30 -05001784 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001785 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001786 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001787
Nicolas Capens4f172c72016-01-13 08:34:30 -05001788 w_ = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001789
Nicolas Capens4f172c72016-01-13 08:34:30 -05001790 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001791 }
1792
Nicolas Capens4f172c72016-01-13 08:34:30 -05001793 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001794 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001795 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001796
Nicolas Capens4f172c72016-01-13 08:34:30 -05001797 dst.x = RoundShort4(u_ * Float4(0x1000));
1798 dst.y = RoundShort4(v_ * Float4(0x1000));
1799 dst.z = RoundShort4(w_ * Float4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001800 dst.w = Short4(0x1000);
1801 }
1802
Nicolas Capens4f172c72016-01-13 08:34:30 -05001803 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001804 {
1805 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
1806 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001807 U = Float4(src0.x);
1808 V = Float4(src0.y);
1809 W = Float4(src0.z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001810
1811 previousScaling = signedScaling;
1812 }
1813
Nicolas Capens4f172c72016-01-13 08:34:30 -05001814 Float4 x = U * u + V * v + W * s;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001815
1816 x *= Float4(1.0f / 0x1000);
1817
1818 switch(component)
1819 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001820 case 0: u_ = x; break;
1821 case 1: v_ = x; break;
1822 case 2: w_ = x; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001823 default: ASSERT(false);
1824 }
1825 }
1826
Nicolas Capens4f172c72016-01-13 08:34:30 -05001827 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001828 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001829 TEXM3X3PAD(u, v, s, src0, 2, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001830
1831 Float4 E[3]; // Eye vector
1832
1833 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1834 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1835 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1836
1837 // Reflection
1838 Float4 u__;
1839 Float4 v__;
1840 Float4 w__;
1841
1842 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
Nicolas Capens4f172c72016-01-13 08:34:30 -05001843 u__ = u_ * E[0];
1844 v__ = v_ * E[1];
1845 w__ = w_ * E[2];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001846 u__ += v__ + w__;
1847 u__ += u__;
1848 v__ = u__;
1849 w__ = u__;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001850 u__ *= u_;
1851 v__ *= v_;
1852 w__ *= w_;
1853 u_ *= u_;
1854 v_ *= v_;
1855 w_ *= w_;
1856 u_ += v_ + w_;
1857 u__ -= E[0] * u_;
1858 v__ -= E[1] * u_;
1859 w__ -= E[2] * u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001860
Nicolas Capens4f172c72016-01-13 08:34:30 -05001861 sampleTexture(dst, stage, u__, v__, w__, w__);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001862 }
1863
Nicolas Capens4f172c72016-01-13 08:34:30 -05001864 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001865 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001866 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001867
Nicolas Capens4f172c72016-01-13 08:34:30 -05001868 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001869 }
1870
Nicolas Capens4f172c72016-01-13 08:34:30 -05001871 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001872 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001873 TEXM3X3PAD(x, y, z, src0, 2, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001874
1875 Float4 E[3]; // Eye vector
1876
Nicolas Capens4f172c72016-01-13 08:34:30 -05001877 E[0] = v[2 + stage - 2].w;
1878 E[1] = v[2 + stage - 1].w;
1879 E[2] = v[2 + stage - 0].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001880
1881 // Reflection
1882 Float4 u__;
1883 Float4 v__;
1884 Float4 w__;
1885
1886 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
Nicolas Capens4f172c72016-01-13 08:34:30 -05001887 u__ = u_ * E[0];
1888 v__ = v_ * E[1];
1889 w__ = w_ * E[2];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001890 u__ += v__ + w__;
1891 u__ += u__;
1892 v__ = u__;
1893 w__ = u__;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001894 u__ *= u_;
1895 v__ *= v_;
1896 w__ *= w_;
1897 u_ *= u_;
1898 v_ *= v_;
1899 w_ *= w_;
1900 u_ += v_ + w_;
1901 u__ -= E[0] * u_;
1902 v__ -= E[1] * u_;
1903 w__ -= E[2] * u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001904
Nicolas Capens4f172c72016-01-13 08:34:30 -05001905 sampleTexture(dst, stage, u__, v__, w__, w__);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001906 }
1907
Nicolas Capens4f172c72016-01-13 08:34:30 -05001908 void PixelPipeline::TEXDEPTH()
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001909 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001910 u_ = Float4(rs[5].x);
1911 v_ = Float4(rs[5].y);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001912
1913 // z / w
Nicolas Capens4f172c72016-01-13 08:34:30 -05001914 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001915
Nicolas Capens4f172c72016-01-13 08:34:30 -05001916 oDepth = u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001917 }
1918
1919 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1920 {
1921 { Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1922 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1923 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1924 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1925 }
1926
1927 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1928 {
1929 { Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1930 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1931 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1932 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1933 }
1934
Nicolas Capens4f172c72016-01-13 08:34:30 -05001935 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001936 {
1937 Short4 t0;
1938 Short4 t1;
1939
1940 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
Nicolas Capens4f172c72016-01-13 08:34:30 -05001941 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1942 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001943 t0 = AddSat(t0, t1);
1944 t0 = AddSat(t0, src0.x);
1945 dst.x = t0;
1946
1947 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
Nicolas Capens4f172c72016-01-13 08:34:30 -05001948 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1949 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001950 t0 = AddSat(t0, t1);
1951 t0 = AddSat(t0, src0.y);
1952 dst.y = t0;
1953 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001954}
1955