blob: 99f91e7e60108d89f3f9983b8f15090e5e96858f [file] [log] [blame]
Nicolas Capens8833e012016-01-01 23:47:52 -05001// SwiftShader Software Renderer
2//
3// Copyright(c) 2015 Google Inc.
4//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of Google Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelPipeline.hpp"
13#include "Renderer.hpp"
14#include "SamplerCore.hpp"
15
16namespace sw
17{
18 extern bool postBlendSRGB;
19
Nicolas Capens4f172c72016-01-13 08:34:30 -050020 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
Nicolas Capens8833e012016-01-01 23:47:52 -050021 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050022 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
23 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
24 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
25 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
Nicolas Capens8833e012016-01-01 23:47:52 -050026
Nicolas Capens4f172c72016-01-13 08:34:30 -050027 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
28 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
29 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
30 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
Nicolas Capens8833e012016-01-01 23:47:52 -050031 }
32
Nicolas Capens4f172c72016-01-13 08:34:30 -050033 void PixelPipeline::fixedFunction()
Nicolas Capens8833e012016-01-01 23:47:52 -050034 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050035 current = diffuse;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040036 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
37
38 for(int stage = 0; stage < 8; stage++)
39 {
40 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
41 {
42 break;
43 }
44
45 Vector4s texture;
46
47 if(state.textureStage[stage].usesTexture)
48 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050049 sampleTexture(texture, stage, stage);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040050 }
51
Nicolas Capens4f172c72016-01-13 08:34:30 -050052 blendTexture(temp, texture, stage);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040053 }
54
Nicolas Capens4f172c72016-01-13 08:34:30 -050055 specularPixel(current, specular);
Nicolas Capens8833e012016-01-01 23:47:52 -050056 }
57
Nicolas Capens4f172c72016-01-13 08:34:30 -050058 void PixelPipeline::applyShader(Int cMask[4])
Alexis Hetuf2a8c372015-07-13 11:08:41 -040059 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040060 if(!shader)
61 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050062 fixedFunction();
Alexis Hetuf2a8c372015-07-13 11:08:41 -040063 return;
64 }
65
66 int pad = 0; // Count number of texm3x3pad instructions
67 Vector4s dPairing; // Destination for first pairing instruction
68
69 for(size_t i = 0; i < shader->getLength(); i++)
70 {
71 const Shader::Instruction *instruction = shader->getInstruction(i);
72 Shader::Opcode opcode = instruction->opcode;
73
74 // #ifndef NDEBUG // FIXME: Centralize debug output control
75 // shader->printInstruction(i, "debug.txt");
76 // #endif
77
78 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
79 {
80 continue;
81 }
82
83 const Dst &dst = instruction->dst;
84 const Src &src0 = instruction->src[0];
85 const Src &src1 = instruction->src[1];
86 const Src &src2 = instruction->src[2];
87
88 unsigned short version = shader->getVersion();
89 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
90 bool coissue = instruction->coissue; // Second instruction of pair
91
92 Vector4s d;
93 Vector4s s0;
94 Vector4s s1;
95 Vector4s s2;
96
Nicolas Capensc2534f42016-04-04 11:13:24 -040097 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
98 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
99 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400100
Nicolas Capens4f172c72016-01-13 08:34:30 -0500101 Float4 x = version < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
102 Float4 y = version < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
103 Float4 z = version < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
104 Float4 w = version < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400105
106 switch(opcode)
107 {
108 case Shader::OPCODE_PS_1_0: break;
109 case Shader::OPCODE_PS_1_1: break;
110 case Shader::OPCODE_PS_1_2: break;
111 case Shader::OPCODE_PS_1_3: break;
112 case Shader::OPCODE_PS_1_4: break;
113
114 case Shader::OPCODE_DEF: break;
115
116 case Shader::OPCODE_NOP: break;
117 case Shader::OPCODE_MOV: MOV(d, s0); break;
118 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
119 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
120 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
121 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
122 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
123 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
124 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
125 case Shader::OPCODE_TEXCOORD:
126 if(version < 0x0104)
127 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500128 TEXCOORD(d, x, y, z, dst.index);
129 }
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400130 else
131 {
132 if((src0.swizzle & 0x30) == 0x20) // .xyz
133 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500134 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400135 }
Nicolas Capens4f172c72016-01-13 08:34:30 -0500136 else // .xwy
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400137 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500138 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400139 }
140 }
141 break;
142 case Shader::OPCODE_TEXKILL:
143 if(version < 0x0104)
144 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500145 TEXKILL(cMask, x, y, z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400146 }
147 else if(version == 0x0104)
148 {
149 if(dst.type == Shader::PARAMETER_TEXTURE)
150 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500151 TEXKILL(cMask, x, y, z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400152 }
153 else
154 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500155 TEXKILL(cMask, rs[dst.index]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400156 }
157 }
158 else ASSERT(false);
159 break;
160 case Shader::OPCODE_TEX:
161 if(version < 0x0104)
162 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500163 TEX(d, x, y, z, dst.index, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400164 }
165 else if(version == 0x0104)
166 {
167 if(src0.type == Shader::PARAMETER_TEXTURE)
168 {
169 if((src0.swizzle & 0x30) == 0x20) // .xyz
170 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500171 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400172 }
173 else // .xyw
174 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500175 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400176 }
177 }
178 else
179 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500180 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400181 }
182 }
183 else ASSERT(false);
184 break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500185 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break;
186 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break;
187 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break;
188 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break;
189 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
190 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
191 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
192 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
193 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break;
194 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break;
195 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
196 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break;
197 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break;
198 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
199 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break;
200 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
201 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break;
202 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
203 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break;
204 case Shader::OPCODE_PHASE: break;
205 case Shader::OPCODE_END: break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400206 default:
207 ASSERT(false);
208 }
209
210 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
211 {
212 if(dst.shift > 0)
213 {
214 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
215 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
216 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
217 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
218 }
219 else if(dst.shift < 0)
220 {
221 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
222 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
223 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
224 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
225 }
226
227 if(dst.saturate)
228 {
229 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
230 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
231 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
232 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
233 }
234
235 if(pairing)
236 {
237 if(dst.mask & 0x1) dPairing.x = d.x;
238 if(dst.mask & 0x2) dPairing.y = d.y;
239 if(dst.mask & 0x4) dPairing.z = d.z;
240 if(dst.mask & 0x8) dPairing.w = d.w;
241 }
242
243 if(coissue)
244 {
245 const Dst &dst = shader->getInstruction(i - 1)->dst;
246
Nicolas Capens4f172c72016-01-13 08:34:30 -0500247 writeDestination(dPairing, dst);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400248 }
249
250 if(!pairing)
251 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500252 writeDestination(d, dst);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400253 }
254 }
255 }
Nicolas Capens8833e012016-01-01 23:47:52 -0500256 }
257
Nicolas Capens4f172c72016-01-13 08:34:30 -0500258 Bool PixelPipeline::alphaTest(Int cMask[4])
Nicolas Capens8833e012016-01-01 23:47:52 -0500259 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500260 current.x = Min(current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.x = Max(current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
261 current.y = Min(current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.y = Max(current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
262 current.z = Min(current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.z = Max(current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
263 current.w = Min(current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.w = Max(current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400264
265 if(!state.alphaTestActive())
266 {
267 return true;
268 }
269
270 Int aMask;
271
272 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
273 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500274 PixelRoutine::alphaTest(aMask, current.w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400275
276 for(unsigned int q = 0; q < state.multiSample; q++)
277 {
278 cMask[q] &= aMask;
279 }
280 }
281 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
282 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500283 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400284
Nicolas Capens4f172c72016-01-13 08:34:30 -0500285 alphaToCoverage(cMask, alpha);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400286 }
287 else ASSERT(false);
288
289 Int pass = cMask[0];
290
291 for(unsigned int q = 1; q < state.multiSample; q++)
292 {
293 pass = pass | cMask[q];
294 }
295
Nicolas Capens8833e012016-01-01 23:47:52 -0500296 return pass != 0x0;
297 }
298
Nicolas Capens4f172c72016-01-13 08:34:30 -0500299 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
Nicolas Capens8833e012016-01-01 23:47:52 -0500300 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400301 if(!state.colorWriteActive(0))
302 {
303 return;
304 }
305
306 Vector4f oC;
307
308 switch(state.targetFormat[0])
309 {
310 case FORMAT_R5G6B5:
311 case FORMAT_X8R8G8B8:
312 case FORMAT_X8B8G8R8:
313 case FORMAT_A8R8G8B8:
314 case FORMAT_A8B8G8R8:
315 case FORMAT_A8:
316 case FORMAT_G16R16:
317 case FORMAT_A16B16G16R16:
318 if(!postBlendSRGB && state.writeSRGB)
319 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500320 linearToSRGB12_16(current);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400321 }
322 else
323 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500324 current.x <<= 4;
325 current.y <<= 4;
326 current.z <<= 4;
327 current.w <<= 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400328 }
329
330 if(state.targetFormat[0] == FORMAT_R5G6B5)
331 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500332 current.x &= Short4(0xF800u);
333 current.y &= Short4(0xFC00u);
334 current.z &= Short4(0xF800u);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400335 }
336
Nicolas Capens4f172c72016-01-13 08:34:30 -0500337 fogBlend(current, fog);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400338
339 for(unsigned int q = 0; q < state.multiSample; q++)
340 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500341 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
342 Vector4s color = current;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400343
344 if(state.multiSampleMask & (1 << q))
345 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500346 alphaBlend(0, buffer, color, x);
347 logicOperation(0, buffer, color, x);
348 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400349 }
350 }
351 break;
352 case FORMAT_R32F:
353 case FORMAT_G32R32F:
354 case FORMAT_A32B32G32R32F:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500355 convertSigned12(oC, current);
356 PixelRoutine::fogBlend(oC, fog);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400357
358 for(unsigned int q = 0; q < state.multiSample; q++)
359 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500360 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400361 Vector4f color = oC;
362
363 if(state.multiSampleMask & (1 << q))
364 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500365 alphaBlend(0, buffer, color, x);
366 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400367 }
368 }
369 break;
370 default:
371 ASSERT(false);
Nicolas Capens8833e012016-01-01 23:47:52 -0500372 }
373 }
374
Nicolas Capens4f172c72016-01-13 08:34:30 -0500375 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400376 {
377 Vector4s *arg1;
378 Vector4s *arg2;
379 Vector4s *arg3;
380 Vector4s res;
381
382 Vector4s constant;
383 Vector4s tfactor;
384
385 const TextureStage::State &textureStage = state.textureStage[stage];
386
387 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
388 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
389 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
390 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
391 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
392 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
393 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500394 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
395 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
396 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
397 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400398 }
399
400 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
401 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
402 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
403 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
404 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
405 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
406 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500407 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
408 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
409 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
410 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400411 }
412
413 // Premodulate
414 if(stage > 0 && textureStage.usesTexture)
415 {
416 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
417 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500418 current.x = MulHigh(current.x, texture.x) << 4;
419 current.y = MulHigh(current.y, texture.y) << 4;
420 current.z = MulHigh(current.z, texture.z) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400421 }
422
423 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
424 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500425 current.w = MulHigh(current.w, texture.w) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400426 }
427 }
428
429 if(luminance)
430 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500431 texture.x = MulHigh(texture.x, L) << 4;
432 texture.y = MulHigh(texture.y, L) << 4;
433 texture.z = MulHigh(texture.z, L) << 4;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400434
435 luminance = false;
436 }
437
438 switch(textureStage.firstArgument)
439 {
440 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
441 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500442 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
443 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
444 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400445 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
446 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
447 default:
448 ASSERT(false);
449 }
450
451 switch(textureStage.secondArgument)
452 {
453 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
454 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500455 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
456 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
457 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400458 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
459 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
460 default:
461 ASSERT(false);
462 }
463
464 switch(textureStage.thirdArgument)
465 {
466 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
467 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500468 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
469 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
470 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400471 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
472 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
473 default:
474 ASSERT(false);
475 }
476
477 Vector4s mod1;
478 Vector4s mod2;
479 Vector4s mod3;
480
481 switch(textureStage.firstModifier)
482 {
483 case TextureStage::MODIFIER_COLOR:
484 break;
485 case TextureStage::MODIFIER_INVCOLOR:
486 mod1.x = SubSat(Short4(0x1000), arg1->x);
487 mod1.y = SubSat(Short4(0x1000), arg1->y);
488 mod1.z = SubSat(Short4(0x1000), arg1->z);
489 mod1.w = SubSat(Short4(0x1000), arg1->w);
490
491 arg1 = &mod1;
492 break;
493 case TextureStage::MODIFIER_ALPHA:
494 mod1.x = arg1->w;
495 mod1.y = arg1->w;
496 mod1.z = arg1->w;
497 mod1.w = arg1->w;
498
499 arg1 = &mod1;
500 break;
501 case TextureStage::MODIFIER_INVALPHA:
502 mod1.x = SubSat(Short4(0x1000), arg1->w);
503 mod1.y = SubSat(Short4(0x1000), arg1->w);
504 mod1.z = SubSat(Short4(0x1000), arg1->w);
505 mod1.w = SubSat(Short4(0x1000), arg1->w);
506
507 arg1 = &mod1;
508 break;
509 default:
510 ASSERT(false);
511 }
512
513 switch(textureStage.secondModifier)
514 {
515 case TextureStage::MODIFIER_COLOR:
516 break;
517 case TextureStage::MODIFIER_INVCOLOR:
518 mod2.x = SubSat(Short4(0x1000), arg2->x);
519 mod2.y = SubSat(Short4(0x1000), arg2->y);
520 mod2.z = SubSat(Short4(0x1000), arg2->z);
521 mod2.w = SubSat(Short4(0x1000), arg2->w);
522
523 arg2 = &mod2;
524 break;
525 case TextureStage::MODIFIER_ALPHA:
526 mod2.x = arg2->w;
527 mod2.y = arg2->w;
528 mod2.z = arg2->w;
529 mod2.w = arg2->w;
530
531 arg2 = &mod2;
532 break;
533 case TextureStage::MODIFIER_INVALPHA:
534 mod2.x = SubSat(Short4(0x1000), arg2->w);
535 mod2.y = SubSat(Short4(0x1000), arg2->w);
536 mod2.z = SubSat(Short4(0x1000), arg2->w);
537 mod2.w = SubSat(Short4(0x1000), arg2->w);
538
539 arg2 = &mod2;
540 break;
541 default:
542 ASSERT(false);
543 }
544
545 switch(textureStage.thirdModifier)
546 {
547 case TextureStage::MODIFIER_COLOR:
548 break;
549 case TextureStage::MODIFIER_INVCOLOR:
550 mod3.x = SubSat(Short4(0x1000), arg3->x);
551 mod3.y = SubSat(Short4(0x1000), arg3->y);
552 mod3.z = SubSat(Short4(0x1000), arg3->z);
553 mod3.w = SubSat(Short4(0x1000), arg3->w);
554
555 arg3 = &mod3;
556 break;
557 case TextureStage::MODIFIER_ALPHA:
558 mod3.x = arg3->w;
559 mod3.y = arg3->w;
560 mod3.z = arg3->w;
561 mod3.w = arg3->w;
562
563 arg3 = &mod3;
564 break;
565 case TextureStage::MODIFIER_INVALPHA:
566 mod3.x = SubSat(Short4(0x1000), arg3->w);
567 mod3.y = SubSat(Short4(0x1000), arg3->w);
568 mod3.z = SubSat(Short4(0x1000), arg3->w);
569 mod3.w = SubSat(Short4(0x1000), arg3->w);
570
571 arg3 = &mod3;
572 break;
573 default:
574 ASSERT(false);
575 }
576
577 switch(textureStage.stageOperation)
578 {
579 case TextureStage::STAGE_DISABLE:
580 break;
581 case TextureStage::STAGE_SELECTARG1: // Arg1
582 res.x = arg1->x;
583 res.y = arg1->y;
584 res.z = arg1->z;
585 break;
586 case TextureStage::STAGE_SELECTARG2: // Arg2
587 res.x = arg2->x;
588 res.y = arg2->y;
589 res.z = arg2->z;
590 break;
591 case TextureStage::STAGE_SELECTARG3: // Arg3
592 res.x = arg3->x;
593 res.y = arg3->y;
594 res.z = arg3->z;
595 break;
596 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
597 res.x = MulHigh(arg1->x, arg2->x) << 4;
598 res.y = MulHigh(arg1->y, arg2->y) << 4;
599 res.z = MulHigh(arg1->z, arg2->z) << 4;
600 break;
601 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
602 res.x = MulHigh(arg1->x, arg2->x) << 5;
603 res.y = MulHigh(arg1->y, arg2->y) << 5;
604 res.z = MulHigh(arg1->z, arg2->z) << 5;
605 break;
606 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
607 res.x = MulHigh(arg1->x, arg2->x) << 6;
608 res.y = MulHigh(arg1->y, arg2->y) << 6;
609 res.z = MulHigh(arg1->z, arg2->z) << 6;
610 break;
611 case TextureStage::STAGE_ADD: // Arg1 + Arg2
612 res.x = AddSat(arg1->x, arg2->x);
613 res.y = AddSat(arg1->y, arg2->y);
614 res.z = AddSat(arg1->z, arg2->z);
615 break;
616 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
617 res.x = AddSat(arg1->x, arg2->x);
618 res.y = AddSat(arg1->y, arg2->y);
619 res.z = AddSat(arg1->z, arg2->z);
620
621 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
622 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
623 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
624 break;
625 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
626 res.x = AddSat(arg1->x, arg2->x);
627 res.y = AddSat(arg1->y, arg2->y);
628 res.z = AddSat(arg1->z, arg2->z);
629
630 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
631 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
632 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
633
634 res.x = AddSat(res.x, res.x);
635 res.y = AddSat(res.y, res.y);
636 res.z = AddSat(res.z, res.z);
637 break;
638 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
639 res.x = SubSat(arg1->x, arg2->x);
640 res.y = SubSat(arg1->y, arg2->y);
641 res.z = SubSat(arg1->z, arg2->z);
642 break;
643 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
644 {
645 Short4 tmp;
646
647 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
648 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
649 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
650 }
651 break;
652 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
653 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
654 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
655 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
656 break;
657 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
658 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
659 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
660 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
661 break;
662 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
663 {
664 Short4 tmp;
665
666 res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
667 res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
668 res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
669
670 res.x = res.x << 6;
671 res.y = res.y << 6;
672 res.z = res.z << 6;
673
674 res.x = AddSat(res.x, res.y);
675 res.x = AddSat(res.x, res.z);
676
677 // Clamp to [0, 1]
678 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
679 res.x = Min(res.x, Short4(0x1000));
680
681 res.y = res.x;
682 res.z = res.x;
683 res.w = res.x;
684 }
685 break;
686 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500687 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
688 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
689 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400690 break;
691 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500692 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
693 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
694 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400695 break;
696 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500697 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
698 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
699 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400700 break;
701 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
702 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
703 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
704 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
705 break;
706 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
707 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
708 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
709 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
710 break;
711 case TextureStage::STAGE_PREMODULATE:
712 res.x = arg1->x;
713 res.y = arg1->y;
714 res.z = arg1->z;
715 break;
716 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
717 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
718 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
719 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
720 break;
721 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
722 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
723 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
724 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
725 break;
726 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
727 {
728 Short4 tmp;
729
730 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
731 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
732 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
733 }
734 break;
735 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
736 {
737 Short4 tmp;
738
739 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
740 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
741 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
742 }
743 break;
744 case TextureStage::STAGE_BUMPENVMAP:
745 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500746 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
747 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400748
749 Float4 du2;
750 Float4 dv2;
751
Nicolas Capens4f172c72016-01-13 08:34:30 -0500752 du2 = du;
753 dv2 = dv;
754 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
755 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
756 du += dv2;
757 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
758 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
759 dv += du2;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400760
761 perturbate = true;
762
Nicolas Capens4f172c72016-01-13 08:34:30 -0500763 res.x = current.x;
764 res.y = current.y;
765 res.z = current.z;
766 res.w = current.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400767 }
768 break;
769 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
770 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500771 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
772 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400773
774 Float4 du2;
775 Float4 dv2;
776
Nicolas Capens4f172c72016-01-13 08:34:30 -0500777 du2 = du;
778 dv2 = dv;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400779
Nicolas Capens4f172c72016-01-13 08:34:30 -0500780 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
781 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
782 du += dv2;
783 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
784 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
785 dv += du2;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400786
787 perturbate = true;
788
Nicolas Capens4f172c72016-01-13 08:34:30 -0500789 L = texture.z;
790 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
791 L = L << 4;
792 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
793 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
794 L = Min(L, Short4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400795
796 luminance = true;
797
Nicolas Capens4f172c72016-01-13 08:34:30 -0500798 res.x = current.x;
799 res.y = current.y;
800 res.z = current.z;
801 res.w = current.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400802 }
803 break;
804 default:
805 ASSERT(false);
806 }
807
808 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
809 {
810 switch(textureStage.firstArgumentAlpha)
811 {
812 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
813 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500814 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
815 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
816 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400817 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
818 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
819 default:
820 ASSERT(false);
821 }
822
823 switch(textureStage.secondArgumentAlpha)
824 {
825 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
826 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500827 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
828 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
829 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400830 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
831 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
832 default:
833 ASSERT(false);
834 }
835
836 switch(textureStage.thirdArgumentAlpha)
837 {
838 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
839 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500840 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
841 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
842 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400843 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
844 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
845 default:
846 ASSERT(false);
847 }
848
849 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
850 {
851 case TextureStage::MODIFIER_COLOR:
852 break;
853 case TextureStage::MODIFIER_INVCOLOR:
854 mod1.w = SubSat(Short4(0x1000), arg1->w);
855
856 arg1 = &mod1;
857 break;
858 case TextureStage::MODIFIER_ALPHA:
859 // Redudant
860 break;
861 case TextureStage::MODIFIER_INVALPHA:
862 mod1.w = SubSat(Short4(0x1000), arg1->w);
863
864 arg1 = &mod1;
865 break;
866 default:
867 ASSERT(false);
868 }
869
870 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
871 {
872 case TextureStage::MODIFIER_COLOR:
873 break;
874 case TextureStage::MODIFIER_INVCOLOR:
875 mod2.w = SubSat(Short4(0x1000), arg2->w);
876
877 arg2 = &mod2;
878 break;
879 case TextureStage::MODIFIER_ALPHA:
880 // Redudant
881 break;
882 case TextureStage::MODIFIER_INVALPHA:
883 mod2.w = SubSat(Short4(0x1000), arg2->w);
884
885 arg2 = &mod2;
886 break;
887 default:
888 ASSERT(false);
889 }
890
891 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
892 {
893 case TextureStage::MODIFIER_COLOR:
894 break;
895 case TextureStage::MODIFIER_INVCOLOR:
896 mod3.w = SubSat(Short4(0x1000), arg3->w);
897
898 arg3 = &mod3;
899 break;
900 case TextureStage::MODIFIER_ALPHA:
901 // Redudant
902 break;
903 case TextureStage::MODIFIER_INVALPHA:
904 mod3.w = SubSat(Short4(0x1000), arg3->w);
905
906 arg3 = &mod3;
907 break;
908 default:
909 ASSERT(false);
910 }
911
912 switch(textureStage.stageOperationAlpha)
913 {
914 case TextureStage::STAGE_DISABLE:
915 break;
916 case TextureStage::STAGE_SELECTARG1: // Arg1
917 res.w = arg1->w;
918 break;
919 case TextureStage::STAGE_SELECTARG2: // Arg2
920 res.w = arg2->w;
921 break;
922 case TextureStage::STAGE_SELECTARG3: // Arg3
923 res.w = arg3->w;
924 break;
925 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
926 res.w = MulHigh(arg1->w, arg2->w) << 4;
927 break;
928 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
929 res.w = MulHigh(arg1->w, arg2->w) << 5;
930 break;
931 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
932 res.w = MulHigh(arg1->w, arg2->w) << 6;
933 break;
934 case TextureStage::STAGE_ADD: // Arg1 + Arg2
935 res.w = AddSat(arg1->w, arg2->w);
936 break;
937 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
938 res.w = AddSat(arg1->w, arg2->w);
939 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
940 break;
941 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
942 res.w = AddSat(arg1->w, arg2->w);
943 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
944 res.w = AddSat(res.w, res.w);
945 break;
946 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
947 res.w = SubSat(arg1->w, arg2->w);
948 break;
949 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
950 {
951 Short4 tmp;
952
953 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
954 }
955 break;
956 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
957 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
958 break;
959 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
960 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
961 break;
962 case TextureStage::STAGE_DOT3:
963 break; // Already computed in color channel
964 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
Nicolas Capens4f172c72016-01-13 08:34:30 -0500965 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400966 break;
967 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500968 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400969 break;
970 case TextureStage::STAGE_BLENDFACTORALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500971 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400972 break;
973 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
974 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
975 break;
976 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
977 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
978 break;
979 case TextureStage::STAGE_PREMODULATE:
980 res.w = arg1->w;
981 break;
982 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
983 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
984 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
985 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
986 case TextureStage::STAGE_BUMPENVMAP:
987 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
988 break; // Invalid alpha operations
989 default:
990 ASSERT(false);
991 }
992 }
993
994 // Clamp result to [0, 1]
995
996 switch(textureStage.stageOperation)
997 {
998 case TextureStage::STAGE_DISABLE:
999 case TextureStage::STAGE_SELECTARG1:
1000 case TextureStage::STAGE_SELECTARG2:
1001 case TextureStage::STAGE_SELECTARG3:
1002 case TextureStage::STAGE_MODULATE:
1003 case TextureStage::STAGE_MODULATE2X:
1004 case TextureStage::STAGE_MODULATE4X:
1005 case TextureStage::STAGE_ADD:
1006 case TextureStage::STAGE_MULTIPLYADD:
1007 case TextureStage::STAGE_LERP:
1008 case TextureStage::STAGE_BLENDCURRENTALPHA:
1009 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1010 case TextureStage::STAGE_BLENDFACTORALPHA:
1011 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1012 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1013 case TextureStage::STAGE_DOT3: // Already clamped
1014 case TextureStage::STAGE_PREMODULATE:
1015 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1016 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1017 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1018 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1019 case TextureStage::STAGE_BUMPENVMAP:
1020 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1021 if(state.textureStage[stage].cantUnderflow)
1022 {
1023 break; // Can't go below zero
1024 }
1025 case TextureStage::STAGE_ADDSIGNED:
1026 case TextureStage::STAGE_ADDSIGNED2X:
1027 case TextureStage::STAGE_SUBTRACT:
1028 case TextureStage::STAGE_ADDSMOOTH:
1029 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1030 res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1031 res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1032 break;
1033 default:
1034 ASSERT(false);
1035 }
1036
1037 switch(textureStage.stageOperationAlpha)
1038 {
1039 case TextureStage::STAGE_DISABLE:
1040 case TextureStage::STAGE_SELECTARG1:
1041 case TextureStage::STAGE_SELECTARG2:
1042 case TextureStage::STAGE_SELECTARG3:
1043 case TextureStage::STAGE_MODULATE:
1044 case TextureStage::STAGE_MODULATE2X:
1045 case TextureStage::STAGE_MODULATE4X:
1046 case TextureStage::STAGE_ADD:
1047 case TextureStage::STAGE_MULTIPLYADD:
1048 case TextureStage::STAGE_LERP:
1049 case TextureStage::STAGE_BLENDCURRENTALPHA:
1050 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1051 case TextureStage::STAGE_BLENDFACTORALPHA:
1052 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1053 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1054 case TextureStage::STAGE_DOT3: // Already clamped
1055 case TextureStage::STAGE_PREMODULATE:
1056 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1057 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1058 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1059 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1060 case TextureStage::STAGE_BUMPENVMAP:
1061 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1062 if(state.textureStage[stage].cantUnderflow)
1063 {
1064 break; // Can't go below zero
1065 }
1066 case TextureStage::STAGE_ADDSIGNED:
1067 case TextureStage::STAGE_ADDSIGNED2X:
1068 case TextureStage::STAGE_SUBTRACT:
1069 case TextureStage::STAGE_ADDSMOOTH:
1070 res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1071 break;
1072 default:
1073 ASSERT(false);
1074 }
1075
1076 switch(textureStage.stageOperation)
1077 {
1078 case TextureStage::STAGE_DISABLE:
1079 case TextureStage::STAGE_SELECTARG1:
1080 case TextureStage::STAGE_SELECTARG2:
1081 case TextureStage::STAGE_SELECTARG3:
1082 case TextureStage::STAGE_MODULATE:
1083 case TextureStage::STAGE_SUBTRACT:
1084 case TextureStage::STAGE_ADDSMOOTH:
1085 case TextureStage::STAGE_LERP:
1086 case TextureStage::STAGE_BLENDCURRENTALPHA:
1087 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1088 case TextureStage::STAGE_BLENDFACTORALPHA:
1089 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1090 case TextureStage::STAGE_DOT3: // Already clamped
1091 case TextureStage::STAGE_PREMODULATE:
1092 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1093 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1094 case TextureStage::STAGE_BUMPENVMAP:
1095 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1096 break; // Can't go above one
1097 case TextureStage::STAGE_MODULATE2X:
1098 case TextureStage::STAGE_MODULATE4X:
1099 case TextureStage::STAGE_ADD:
1100 case TextureStage::STAGE_ADDSIGNED:
1101 case TextureStage::STAGE_ADDSIGNED2X:
1102 case TextureStage::STAGE_MULTIPLYADD:
1103 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1104 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1105 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1106 res.x = Min(res.x, Short4(0x1000));
1107 res.y = Min(res.y, Short4(0x1000));
1108 res.z = Min(res.z, Short4(0x1000));
1109 break;
1110 default:
1111 ASSERT(false);
1112 }
1113
1114 switch(textureStage.stageOperationAlpha)
1115 {
1116 case TextureStage::STAGE_DISABLE:
1117 case TextureStage::STAGE_SELECTARG1:
1118 case TextureStage::STAGE_SELECTARG2:
1119 case TextureStage::STAGE_SELECTARG3:
1120 case TextureStage::STAGE_MODULATE:
1121 case TextureStage::STAGE_SUBTRACT:
1122 case TextureStage::STAGE_ADDSMOOTH:
1123 case TextureStage::STAGE_LERP:
1124 case TextureStage::STAGE_BLENDCURRENTALPHA:
1125 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1126 case TextureStage::STAGE_BLENDFACTORALPHA:
1127 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1128 case TextureStage::STAGE_DOT3: // Already clamped
1129 case TextureStage::STAGE_PREMODULATE:
1130 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1131 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1132 case TextureStage::STAGE_BUMPENVMAP:
1133 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1134 break; // Can't go above one
1135 case TextureStage::STAGE_MODULATE2X:
1136 case TextureStage::STAGE_MODULATE4X:
1137 case TextureStage::STAGE_ADD:
1138 case TextureStage::STAGE_ADDSIGNED:
1139 case TextureStage::STAGE_ADDSIGNED2X:
1140 case TextureStage::STAGE_MULTIPLYADD:
1141 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1142 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1143 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1144 res.w = Min(res.w, Short4(0x1000));
1145 break;
1146 default:
1147 ASSERT(false);
1148 }
1149
1150 switch(textureStage.destinationArgument)
1151 {
1152 case TextureStage::DESTINATION_CURRENT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001153 current.x = res.x;
1154 current.y = res.y;
1155 current.z = res.z;
1156 current.w = res.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001157 break;
1158 case TextureStage::DESTINATION_TEMP:
1159 temp.x = res.x;
1160 temp.y = res.y;
1161 temp.z = res.z;
1162 temp.w = res.w;
1163 break;
1164 default:
1165 ASSERT(false);
1166 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001167 }
1168
Nicolas Capens4f172c72016-01-13 08:34:30 -05001169 void PixelPipeline::fogBlend(Vector4s &current, Float4 &f)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001170 {
1171 if(!state.fogActive)
1172 {
1173 return;
1174 }
1175
1176 if(state.pixelFogMode != FOG_NONE)
1177 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001178 pixelFog(f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001179 }
1180
1181 UShort4 fog = convertFixed16(f, true);
1182
1183 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1184 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1185 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1186
1187 UShort4 invFog = UShort4(0xFFFFu) - fog;
1188
Nicolas Capens4f172c72016-01-13 08:34:30 -05001189 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1190 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1191 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
Nicolas Capens8833e012016-01-01 23:47:52 -05001192 }
1193
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001194 void PixelPipeline::specularPixel(Vector4s &current, Vector4s &specular)
1195 {
1196 if(!state.specularAdd)
1197 {
1198 return;
1199 }
1200
1201 current.x = AddSat(current.x, specular.x);
1202 current.y = AddSat(current.y, specular.y);
1203 current.z = AddSat(current.z, specular.z);
Nicolas Capens8833e012016-01-01 23:47:52 -05001204 }
1205
Nicolas Capens4f172c72016-01-13 08:34:30 -05001206 void PixelPipeline::sampleTexture(Vector4s &c, int coordinates, int stage, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001207 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001208 Float4 x = v[2 + coordinates].x;
1209 Float4 y = v[2 + coordinates].y;
1210 Float4 z = v[2 + coordinates].z;
1211 Float4 w = v[2 + coordinates].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001212
1213 if(perturbate)
1214 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001215 x += du;
1216 y += dv;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001217
1218 perturbate = false;
1219 }
1220
Nicolas Capens4f172c72016-01-13 08:34:30 -05001221 sampleTexture(c, stage, x, y, z, w, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001222 }
1223
Nicolas Capensc2534f42016-04-04 11:13:24 -04001224 void PixelPipeline::sampleTexture(Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001225 {
Nicolas Capensc2534f42016-04-04 11:13:24 -04001226 #if PERF_PROFILE
1227 Long texTime = Ticks();
1228 #endif
1229
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001230 Vector4f dsx;
1231 Vector4f dsy;
1232
Nicolas Capens4f172c72016-01-13 08:34:30 -05001233 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001234
1235 if(!project)
1236 {
Nicolas Capensc2534f42016-04-04 11:13:24 -04001237 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001238 }
1239 else
1240 {
1241 Float4 rq = reciprocal(q);
1242
1243 Float4 u_q = u * rq;
1244 Float4 v_q = v * rq;
1245 Float4 w_q = w * rq;
1246
Nicolas Capensc2534f42016-04-04 11:13:24 -04001247 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001248 }
1249
Nicolas Capensc2534f42016-04-04 11:13:24 -04001250 #if PERF_PROFILE
1251 cycles[PERF_TEX] += Ticks() - texTime;
1252 #endif
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001253 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001254
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001255 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1256 {
1257 return RoundShort4(cf * Float4(0x1000));
1258 }
1259
1260 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1261 {
1262 cs.x = convertFixed12(cf.x);
1263 cs.y = convertFixed12(cf.y);
1264 cs.z = convertFixed12(cf.z);
1265 cs.w = convertFixed12(cf.w);
1266 }
1267
1268 Float4 PixelPipeline::convertSigned12(Short4 &cs)
1269 {
1270 return Float4(cs) * Float4(1.0f / 0x0FFE);
1271 }
1272
1273 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1274 {
1275 cf.x = convertSigned12(cs.x);
1276 cf.y = convertSigned12(cs.y);
1277 cf.z = convertSigned12(cs.z);
1278 cf.w = convertSigned12(cs.w);
1279 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001280
Nicolas Capens4f172c72016-01-13 08:34:30 -05001281 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001282 {
1283 switch(dst.type)
1284 {
1285 case Shader::PARAMETER_TEMP:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001286 if(dst.mask & 0x1) rs[dst.index].x = d.x;
1287 if(dst.mask & 0x2) rs[dst.index].y = d.y;
1288 if(dst.mask & 0x4) rs[dst.index].z = d.z;
1289 if(dst.mask & 0x8) rs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001290 break;
1291 case Shader::PARAMETER_INPUT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001292 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1293 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1294 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1295 if(dst.mask & 0x8) vs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001296 break;
1297 case Shader::PARAMETER_CONST: ASSERT(false); break;
1298 case Shader::PARAMETER_TEXTURE:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001299 if(dst.mask & 0x1) ts[dst.index].x = d.x;
1300 if(dst.mask & 0x2) ts[dst.index].y = d.y;
1301 if(dst.mask & 0x4) ts[dst.index].z = d.z;
1302 if(dst.mask & 0x8) ts[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001303 break;
1304 case Shader::PARAMETER_COLOROUT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001305 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1306 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1307 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1308 if(dst.mask & 0x8) vs[dst.index].w = d.w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001309 break;
1310 default:
1311 ASSERT(false);
1312 }
1313 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001314
Nicolas Capensc2534f42016-04-04 11:13:24 -04001315 Vector4s PixelPipeline::fetchRegister(const Src &src)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001316 {
1317 Vector4s *reg;
1318 int i = src.index;
1319
1320 Vector4s c;
1321
1322 if(src.type == Shader::PARAMETER_CONST)
1323 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001324 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1325 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1326 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1327 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001328 }
1329
1330 switch(src.type)
1331 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001332 case Shader::PARAMETER_TEMP: reg = &rs[i]; break;
1333 case Shader::PARAMETER_INPUT: reg = &vs[i]; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001334 case Shader::PARAMETER_CONST: reg = &c; break;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001335 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break;
1336 case Shader::PARAMETER_VOID: return rs[0]; // Dummy
1337 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1338 default: ASSERT(false); return rs[0];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001339 }
1340
1341 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1342 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1343 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1344 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1345
1346 Vector4s mod;
1347
1348 switch(src.modifier)
1349 {
1350 case Shader::MODIFIER_NONE:
1351 mod.x = x;
1352 mod.y = y;
1353 mod.z = z;
1354 mod.w = w;
1355 break;
1356 case Shader::MODIFIER_BIAS:
1357 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1358 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1359 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1360 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1361 break;
1362 case Shader::MODIFIER_BIAS_NEGATE:
1363 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1364 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1365 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1366 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1367 break;
1368 case Shader::MODIFIER_COMPLEMENT:
1369 mod.x = SubSat(Short4(0x1000), x);
1370 mod.y = SubSat(Short4(0x1000), y);
1371 mod.z = SubSat(Short4(0x1000), z);
1372 mod.w = SubSat(Short4(0x1000), w);
1373 break;
1374 case Shader::MODIFIER_NEGATE:
1375 mod.x = -x;
1376 mod.y = -y;
1377 mod.z = -z;
1378 mod.w = -w;
1379 break;
1380 case Shader::MODIFIER_X2:
1381 mod.x = AddSat(x, x);
1382 mod.y = AddSat(y, y);
1383 mod.z = AddSat(z, z);
1384 mod.w = AddSat(w, w);
1385 break;
1386 case Shader::MODIFIER_X2_NEGATE:
1387 mod.x = -AddSat(x, x);
1388 mod.y = -AddSat(y, y);
1389 mod.z = -AddSat(z, z);
1390 mod.w = -AddSat(w, w);
1391 break;
1392 case Shader::MODIFIER_SIGN:
1393 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1394 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1395 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1396 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1397 mod.x = AddSat(mod.x, mod.x);
1398 mod.y = AddSat(mod.y, mod.y);
1399 mod.z = AddSat(mod.z, mod.z);
1400 mod.w = AddSat(mod.w, mod.w);
1401 break;
1402 case Shader::MODIFIER_SIGN_NEGATE:
1403 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1404 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1405 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1406 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1407 mod.x = AddSat(mod.x, mod.x);
1408 mod.y = AddSat(mod.y, mod.y);
1409 mod.z = AddSat(mod.z, mod.z);
1410 mod.w = AddSat(mod.w, mod.w);
1411 break;
1412 case Shader::MODIFIER_DZ:
1413 mod.x = x;
1414 mod.y = y;
1415 mod.z = z;
1416 mod.w = w;
1417 // Projection performed by texture sampler
1418 break;
1419 case Shader::MODIFIER_DW:
1420 mod.x = x;
1421 mod.y = y;
1422 mod.z = z;
1423 mod.w = w;
1424 // Projection performed by texture sampler
1425 break;
1426 default:
1427 ASSERT(false);
1428 }
1429
1430 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1431 {
1432 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1433 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1434 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1435 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1436 }
1437
1438 return mod;
1439 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001440
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001441 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1442 {
1443 dst.x = src0.x;
1444 dst.y = src0.y;
1445 dst.z = src0.z;
1446 dst.w = src0.w;
1447 }
1448
1449 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1450 {
1451 dst.x = AddSat(src0.x, src1.x);
1452 dst.y = AddSat(src0.y, src1.y);
1453 dst.z = AddSat(src0.z, src1.z);
1454 dst.w = AddSat(src0.w, src1.w);
1455 }
1456
1457 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1458 {
1459 dst.x = SubSat(src0.x, src1.x);
1460 dst.y = SubSat(src0.y, src1.y);
1461 dst.z = SubSat(src0.z, src1.z);
1462 dst.w = SubSat(src0.w, src1.w);
1463 }
1464
1465 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1466 {
1467 // FIXME: Long fixed-point multiply fixup
1468 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1469 {
1470 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1471 }
1472 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1473 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1474 }
1475
1476 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1477 {
1478 // FIXME: Long fixed-point multiply fixup
1479 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1480 {
1481 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);
1482 }
1483 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1484 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1485 }
1486
1487 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1488 {
1489 Short4 t0;
1490 Short4 t1;
1491
1492 // FIXME: Long fixed-point multiply fixup
1493 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1494 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1495 t0 = AddSat(t0, t1);
1496 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1497 t0 = AddSat(t0, t1);
1498
1499 dst.x = t0;
1500 dst.y = t0;
1501 dst.z = t0;
1502 dst.w = t0;
1503 }
1504
1505 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1506 {
1507 Short4 t0;
1508 Short4 t1;
1509
1510 // FIXME: Long fixed-point multiply fixup
1511 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1512 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1513 t0 = AddSat(t0, t1);
1514 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1515 t0 = AddSat(t0, t1);
1516 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1517 t0 = AddSat(t0, t1);
1518
1519 dst.x = t0;
1520 dst.y = t0;
1521 dst.z = t0;
1522 dst.w = t0;
1523 }
1524
1525 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1526 {
1527 // FIXME: Long fixed-point multiply fixup
1528 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1529 {
1530 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1531 }
1532 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1533 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1534 }
1535
1536 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1537 {
1538 Float4 uw;
1539 Float4 vw;
1540 Float4 sw;
1541
1542 if(state.interpolant[2 + coordinate].component & 0x01)
1543 {
1544 uw = Max(u, Float4(0.0f));
1545 uw = Min(uw, Float4(1.0f));
1546 dst.x = convertFixed12(uw);
1547 }
1548 else
1549 {
1550 dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1551 }
1552
1553 if(state.interpolant[2 + coordinate].component & 0x02)
1554 {
1555 vw = Max(v, Float4(0.0f));
1556 vw = Min(vw, Float4(1.0f));
1557 dst.y = convertFixed12(vw);
1558 }
1559 else
1560 {
1561 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1562 }
1563
1564 if(state.interpolant[2 + coordinate].component & 0x04)
1565 {
1566 sw = Max(s, Float4(0.0f));
1567 sw = Min(sw, Float4(1.0f));
1568 dst.z = convertFixed12(sw);
1569 }
1570 else
1571 {
1572 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1573 }
1574
1575 dst.w = Short4(0x1000);
1576 }
1577
1578 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1579 {
1580 Float4 uw = u;
1581 Float4 vw = v;
1582 Float4 sw = s;
1583
1584 if(project)
1585 {
1586 uw *= Rcp_pp(s);
1587 vw *= Rcp_pp(s);
1588 }
1589
1590 if(state.interpolant[2 + coordinate].component & 0x01)
1591 {
1592 uw *= Float4(0x1000);
1593 uw = Max(uw, Float4(-0x8000));
1594 uw = Min(uw, Float4(0x7FFF));
1595 dst.x = RoundShort4(uw);
1596 }
1597 else
1598 {
1599 dst.x = Short4(0x0000);
1600 }
1601
1602 if(state.interpolant[2 + coordinate].component & 0x02)
1603 {
1604 vw *= Float4(0x1000);
1605 vw = Max(vw, Float4(-0x8000));
1606 vw = Min(vw, Float4(0x7FFF));
1607 dst.y = RoundShort4(vw);
1608 }
1609 else
1610 {
1611 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1612 }
1613
1614 if(state.interpolant[2 + coordinate].component & 0x04)
1615 {
1616 sw *= Float4(0x1000);
1617 sw = Max(sw, Float4(-0x8000));
1618 sw = Min(sw, Float4(0x7FFF));
1619 dst.z = RoundShort4(sw);
1620 }
1621 else
1622 {
1623 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1624 }
1625 }
1626
Nicolas Capens4f172c72016-01-13 08:34:30 -05001627 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001628 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001629 TEXM3X3PAD(u, v, s, src, 0, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001630
Nicolas Capens4f172c72016-01-13 08:34:30 -05001631 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001632
1633 dst.x = t0;
1634 dst.y = t0;
1635 dst.z = t0;
1636 dst.w = t0;
1637 }
1638
Nicolas Capens4f172c72016-01-13 08:34:30 -05001639 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001640 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001641 TEXM3X3PAD(u, v, s, src0, 0, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001642
Nicolas Capens4f172c72016-01-13 08:34:30 -05001643 v_ = Float4(0.0f);
1644 w_ = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001645
Nicolas Capens4f172c72016-01-13 08:34:30 -05001646 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001647 }
1648
1649 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1650 {
1651 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1652 SignMask(CmpNLT(v, Float4(0.0f))) &
1653 SignMask(CmpNLT(s, Float4(0.0f)));
1654
1655 for(unsigned int q = 0; q < state.multiSample; q++)
1656 {
1657 cMask[q] &= kill;
1658 }
1659 }
1660
1661 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1662 {
1663 Short4 test = src.x | src.y | src.z;
1664 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
1665
1666 for(unsigned int q = 0; q < state.multiSample; q++)
1667 {
1668 cMask[q] &= kill;
1669 }
1670 }
1671
Nicolas Capens4f172c72016-01-13 08:34:30 -05001672 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001673 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001674 sampleTexture(dst, sampler, u, v, s, s, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001675 }
1676
Nicolas Capens4f172c72016-01-13 08:34:30 -05001677 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001678 {
1679 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1680 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1681 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1682
Nicolas Capens4f172c72016-01-13 08:34:30 -05001683 sampleTexture(dst, sampler, u, v, s, s, project);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001684 }
1685
Nicolas Capens4f172c72016-01-13 08:34:30 -05001686 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001687 {
1688 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1689 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1690
1691 Float4 du2 = du;
1692 Float4 dv2 = dv;
1693
Nicolas Capens4f172c72016-01-13 08:34:30 -05001694 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1695 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001696 du += dv2;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001697 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1698 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001699 dv += du2;
1700
1701 Float4 u_ = u + du;
1702 Float4 v_ = v + dv;
1703
Nicolas Capens4f172c72016-01-13 08:34:30 -05001704 sampleTexture(dst, stage, u_, v_, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001705 }
1706
Nicolas Capens4f172c72016-01-13 08:34:30 -05001707 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001708 {
1709 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1710 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1711
1712 Float4 du2 = du;
1713 Float4 dv2 = dv;
1714
Nicolas Capens4f172c72016-01-13 08:34:30 -05001715 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1716 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001717 du += dv2;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001718 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1719 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001720 dv += du2;
1721
1722 Float4 u_ = u + du;
1723 Float4 v_ = v + dv;
1724
Nicolas Capens4f172c72016-01-13 08:34:30 -05001725 sampleTexture(dst, stage, u_, v_, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001726
1727 Short4 L;
1728
1729 L = src.z;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001730 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001731 L = L << 4;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001732 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001733 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1734 L = Min(L, Short4(0x1000));
1735
1736 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1737 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1738 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1739 }
1740
Nicolas Capens4f172c72016-01-13 08:34:30 -05001741 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001742 {
1743 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1744 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1745 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1746
Nicolas Capens4f172c72016-01-13 08:34:30 -05001747 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001748 }
1749
Nicolas Capens4f172c72016-01-13 08:34:30 -05001750 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001751 {
1752 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1753 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1754 Float4 s = v;
1755
Nicolas Capens4f172c72016-01-13 08:34:30 -05001756 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001757 }
1758
Nicolas Capens4f172c72016-01-13 08:34:30 -05001759 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001760 {
1761 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1762 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1763 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1764
Nicolas Capens4f172c72016-01-13 08:34:30 -05001765 sampleTexture(dst, stage, u, v, s, s);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001766 }
1767
Nicolas Capens4f172c72016-01-13 08:34:30 -05001768 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001769 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001770 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001771
1772 // z / w
Nicolas Capens4f172c72016-01-13 08:34:30 -05001773 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001774
Nicolas Capens4f172c72016-01-13 08:34:30 -05001775 oDepth = u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001776 }
1777
Nicolas Capens4f172c72016-01-13 08:34:30 -05001778 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001779 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001780 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001781 }
1782
Nicolas Capens4f172c72016-01-13 08:34:30 -05001783 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001784 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001785 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001786
Nicolas Capens4f172c72016-01-13 08:34:30 -05001787 w_ = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001788
Nicolas Capens4f172c72016-01-13 08:34:30 -05001789 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001790 }
1791
Nicolas Capens4f172c72016-01-13 08:34:30 -05001792 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001793 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001794 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001795
Nicolas Capens4f172c72016-01-13 08:34:30 -05001796 dst.x = RoundShort4(u_ * Float4(0x1000));
1797 dst.y = RoundShort4(v_ * Float4(0x1000));
1798 dst.z = RoundShort4(w_ * Float4(0x1000));
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001799 dst.w = Short4(0x1000);
1800 }
1801
Nicolas Capens4f172c72016-01-13 08:34:30 -05001802 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001803 {
1804 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
1805 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001806 U = Float4(src0.x);
1807 V = Float4(src0.y);
1808 W = Float4(src0.z);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001809
1810 previousScaling = signedScaling;
1811 }
1812
Nicolas Capens4f172c72016-01-13 08:34:30 -05001813 Float4 x = U * u + V * v + W * s;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001814
1815 x *= Float4(1.0f / 0x1000);
1816
1817 switch(component)
1818 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001819 case 0: u_ = x; break;
1820 case 1: v_ = x; break;
1821 case 2: w_ = x; break;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001822 default: ASSERT(false);
1823 }
1824 }
1825
Nicolas Capens4f172c72016-01-13 08:34:30 -05001826 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001827 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001828 TEXM3X3PAD(u, v, s, src0, 2, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001829
1830 Float4 E[3]; // Eye vector
1831
1832 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1833 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1834 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1835
1836 // Reflection
1837 Float4 u__;
1838 Float4 v__;
1839 Float4 w__;
1840
1841 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
Nicolas Capens4f172c72016-01-13 08:34:30 -05001842 u__ = u_ * E[0];
1843 v__ = v_ * E[1];
1844 w__ = w_ * E[2];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001845 u__ += v__ + w__;
1846 u__ += u__;
1847 v__ = u__;
1848 w__ = u__;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001849 u__ *= u_;
1850 v__ *= v_;
1851 w__ *= w_;
1852 u_ *= u_;
1853 v_ *= v_;
1854 w_ *= w_;
1855 u_ += v_ + w_;
1856 u__ -= E[0] * u_;
1857 v__ -= E[1] * u_;
1858 w__ -= E[2] * u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001859
Nicolas Capens4f172c72016-01-13 08:34:30 -05001860 sampleTexture(dst, stage, u__, v__, w__, w__);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001861 }
1862
Nicolas Capens4f172c72016-01-13 08:34:30 -05001863 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001864 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001865 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001866
Nicolas Capens4f172c72016-01-13 08:34:30 -05001867 sampleTexture(dst, stage, u_, v_, w_, w_);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001868 }
1869
Nicolas Capens4f172c72016-01-13 08:34:30 -05001870 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001871 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001872 TEXM3X3PAD(x, y, z, src0, 2, false);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001873
1874 Float4 E[3]; // Eye vector
1875
Nicolas Capens4f172c72016-01-13 08:34:30 -05001876 E[0] = v[2 + stage - 2].w;
1877 E[1] = v[2 + stage - 1].w;
1878 E[2] = v[2 + stage - 0].w;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001879
1880 // Reflection
1881 Float4 u__;
1882 Float4 v__;
1883 Float4 w__;
1884
1885 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
Nicolas Capens4f172c72016-01-13 08:34:30 -05001886 u__ = u_ * E[0];
1887 v__ = v_ * E[1];
1888 w__ = w_ * E[2];
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001889 u__ += v__ + w__;
1890 u__ += u__;
1891 v__ = u__;
1892 w__ = u__;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001893 u__ *= u_;
1894 v__ *= v_;
1895 w__ *= w_;
1896 u_ *= u_;
1897 v_ *= v_;
1898 w_ *= w_;
1899 u_ += v_ + w_;
1900 u__ -= E[0] * u_;
1901 v__ -= E[1] * u_;
1902 w__ -= E[2] * u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001903
Nicolas Capens4f172c72016-01-13 08:34:30 -05001904 sampleTexture(dst, stage, u__, v__, w__, w__);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001905 }
1906
Nicolas Capens4f172c72016-01-13 08:34:30 -05001907 void PixelPipeline::TEXDEPTH()
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001908 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001909 u_ = Float4(rs[5].x);
1910 v_ = Float4(rs[5].y);
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001911
1912 // z / w
Nicolas Capens4f172c72016-01-13 08:34:30 -05001913 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001914
Nicolas Capens4f172c72016-01-13 08:34:30 -05001915 oDepth = u_;
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001916 }
1917
1918 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1919 {
1920 { Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1921 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1922 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1923 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1924 }
1925
1926 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1927 {
1928 { Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1929 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1930 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1931 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1932 }
1933
Nicolas Capens4f172c72016-01-13 08:34:30 -05001934 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001935 {
1936 Short4 t0;
1937 Short4 t1;
1938
1939 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
Nicolas Capens4f172c72016-01-13 08:34:30 -05001940 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1941 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001942 t0 = AddSat(t0, t1);
1943 t0 = AddSat(t0, src0.x);
1944 dst.x = t0;
1945
1946 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
Nicolas Capens4f172c72016-01-13 08:34:30 -05001947 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1948 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
Alexis Hetuf2a8c372015-07-13 11:08:41 -04001949 t0 = AddSat(t0, t1);
1950 t0 = AddSat(t0, src0.y);
1951 dst.y = t0;
1952 }
Nicolas Capens8833e012016-01-01 23:47:52 -05001953}
1954