blob: eeb2116eb2f9609fbef2b0e55cf6c3c12d341f27 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelRoutine.hpp"
16
17#include "SamplerCore.hpp"
18#include "Constants.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050019#include "Device/Renderer.hpp"
20#include "Device/QuadRasterizer.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050021#include "Device/Primitive.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Chris Forbesc2968062019-03-19 16:48:03 -070023#include "Vulkan/VkPipelineLayout.hpp"
24
Nicolas Capens68a82382018-10-02 13:16:55 -040025namespace sw
26{
Nicolas Capens68a82382018-10-02 13:16:55 -040027 extern bool postBlendSRGB;
28 extern bool exactColorRounding;
29 extern bool forceClearRegisters;
30
Ben Clayton76e9bc02019-02-26 15:02:18 +000031 PixelRoutine::PixelRoutine(
32 const PixelProcessor::State &state,
33 vk::PipelineLayout const *pipelineLayout,
Nicolas Capens09591b82019-04-08 22:51:08 -040034 SpirvShader const *spirvShader,
35 const vk::DescriptorSet::Bindings &descriptorSets)
Ben Clayton76e9bc02019-02-26 15:02:18 +000036 : QuadRasterizer(state, spirvShader),
Nicolas Capens09591b82019-04-08 22:51:08 -040037 routine(pipelineLayout),
38 descriptorSets(descriptorSets)
Nicolas Capens68a82382018-10-02 13:16:55 -040039 {
Chris Forbesc61271e2019-02-19 17:01:28 -080040 spirvShader->emitProlog(&routine);
Chris Forbes5d61a1c2019-02-10 21:03:00 +000041
42 if (forceClearRegisters)
Nicolas Capens68a82382018-10-02 13:16:55 -040043 {
Chris Forbes5d61a1c2019-02-10 21:03:00 +000044 for (int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
Nicolas Capens68a82382018-10-02 13:16:55 -040045 {
Chris Forbes64be7c72019-02-19 16:40:57 -080046 routine.inputs[i] = Float4(0.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040047 }
48 }
49 }
50
51 PixelRoutine::~PixelRoutine()
52 {
53 }
54
55 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
56 {
57 #if PERF_PROFILE
58 Long pipeTime = Ticks();
59 #endif
60
Chris Forbeseae5b962019-04-19 17:01:10 -070061 // TODO: consider shader which modifies sample mask in general
62 const bool earlyDepthTest = !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage;
Nicolas Capens68a82382018-10-02 13:16:55 -040063
64 Int zMask[4]; // Depth mask
65 Int sMask[4]; // Stencil mask
66
67 for(unsigned int q = 0; q < state.multiSample; q++)
68 {
69 zMask[q] = cMask[q];
70 sMask[q] = cMask[q];
71 }
72
73 for(unsigned int q = 0; q < state.multiSample; q++)
74 {
75 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
76 }
77
78 Float4 f;
79 Float4 rhwCentroid;
80
81 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
82
83 if(interpolateZ())
84 {
85 for(unsigned int q = 0; q < state.multiSample; q++)
86 {
87 Float4 x = xxxx;
88
89 if(state.multiSample > 1)
90 {
91 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
92 }
93
94 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
95 }
96 }
97
98 Bool depthPass = false;
99
100 if(earlyDepthTest)
101 {
102 for(unsigned int q = 0; q < state.multiSample; q++)
103 {
104 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
105 }
106 }
107
108 If(depthPass || Bool(!earlyDepthTest))
109 {
110 #if PERF_PROFILE
111 Long interpTime = Ticks();
112 #endif
113
114 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
115
116 // Centroid locations
117 Float4 XXXX = Float4(0.0f);
118 Float4 YYYY = Float4(0.0f);
119
120 if(state.centroid)
121 {
122 Float4 WWWW(1.0e-9f);
123
124 for(unsigned int q = 0; q < state.multiSample; q++)
125 {
126 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
127 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
128 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
129 }
130
131 WWWW = Rcp_pp(WWWW);
132 XXXX *= WWWW;
133 YYYY *= WWWW;
134
135 XXXX += xxxx;
136 YYYY += yyyy;
137 }
138
139 if(interpolateW())
140 {
141 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
142 rhw = reciprocal(w, false, false, true);
143
144 if(state.centroid)
145 {
146 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
147 }
148 }
149
Chris Forbes5d61a1c2019-02-10 21:03:00 +0000150 for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400151 {
Chris Forbes5d61a1c2019-02-10 21:03:00 +0000152 auto const & input = spirvShader->inputs[interpolant];
153 if (input.Type != SpirvShader::ATTRIBTYPE_UNUSED)
Nicolas Capens68a82382018-10-02 13:16:55 -0400154 {
Chris Forbes451cad22019-04-19 18:31:39 -0700155 if (input.Centroid && state.multiSample > 1)
Nicolas Capens68a82382018-10-02 13:16:55 -0400156 {
Chris Forbes64be7c72019-02-19 16:40:57 -0800157 routine.inputs[interpolant] =
Chris Forbes5d61a1c2019-02-10 21:03:00 +0000158 interpolateCentroid(XXXX, YYYY, rhwCentroid,
159 primitive + OFFSET(Primitive, V[interpolant]),
Chris Forbesa7a37552019-05-12 10:06:34 -0700160 input.Flat, !input.NoPerspective);
Chris Forbes5d61a1c2019-02-10 21:03:00 +0000161 }
162 else
163 {
Chris Forbes64be7c72019-02-19 16:40:57 -0800164 routine.inputs[interpolant] =
Chris Forbes5d61a1c2019-02-10 21:03:00 +0000165 interpolate(xxxx, Dv[interpolant], rhw,
166 primitive + OFFSET(Primitive, V[interpolant]),
Chris Forbesa7a37552019-05-12 10:06:34 -0700167 input.Flat, !input.NoPerspective, false);
Nicolas Capens68a82382018-10-02 13:16:55 -0400168 }
169 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400170 }
171
172 setBuiltins(x, y, z, w);
173
174 #if PERF_PROFILE
175 cycles[PERF_INTERP] += Ticks() - interpTime;
176 #endif
177
178 Bool alphaPass = true;
179
Chris Forbes71a1e012019-04-22 14:18:34 -0700180 #if PERF_PROFILE
181 Long shaderTime = Ticks();
182 #endif
183
184 if (spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -0400185 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400186 applyShader(cMask);
Chris Forbes71a1e012019-04-22 14:18:34 -0700187 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400188
Chris Forbes71a1e012019-04-22 14:18:34 -0700189 #if PERF_PROFILE
190 cycles[PERF_SHADER] += Ticks() - shaderTime;
191 #endif
Nicolas Capens68a82382018-10-02 13:16:55 -0400192
Chris Forbes71a1e012019-04-22 14:18:34 -0700193 alphaPass = alphaTest(cMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400194
Chris Forbes71a1e012019-04-22 14:18:34 -0700195 if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage)
196 {
197 for(unsigned int q = 0; q < state.multiSample; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400198 {
Chris Forbes71a1e012019-04-22 14:18:34 -0700199 zMask[q] &= cMask[q];
200 sMask[q] &= cMask[q];
Nicolas Capens68a82382018-10-02 13:16:55 -0400201 }
202 }
203
204 If(alphaPass)
205 {
206 if(!earlyDepthTest)
207 {
208 for(unsigned int q = 0; q < state.multiSample; q++)
209 {
210 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
211 }
212 }
213
214 #if PERF_PROFILE
215 Long ropTime = Ticks();
216 #endif
217
218 If(depthPass || Bool(earlyDepthTest))
219 {
220 for(unsigned int q = 0; q < state.multiSample; q++)
221 {
222 if(state.multiSampleMask & (1 << q))
223 {
224 writeDepth(zBuffer, q, x, z[q], zMask[q]);
225
226 if(state.occlusionEnabled)
227 {
228 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
229 }
230 }
231 }
232
Chris Forbes71a1e012019-04-22 14:18:34 -0700233 #if PERF_PROFILE
234 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
235 #endif
Nicolas Capens68a82382018-10-02 13:16:55 -0400236
Chris Forbes71a1e012019-04-22 14:18:34 -0700237 rasterOperation(cBuffer, x, sMask, zMask, cMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400238 }
239
240 #if PERF_PROFILE
241 cycles[PERF_ROP] += Ticks() - ropTime;
242 #endif
243 }
244 }
245
246 for(unsigned int q = 0; q < state.multiSample; q++)
247 {
248 if(state.multiSampleMask & (1 << q))
249 {
250 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
251 }
252 }
253
254 #if PERF_PROFILE
255 cycles[PERF_PIPE] += Ticks() - pipeTime;
256 #endif
257 }
258
259 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
260 {
261 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
262
263 if(!flat)
264 {
265 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
266 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
267
268 if(perspective)
269 {
270 interpolant *= rhw;
271 }
272 }
273
274 return interpolant;
275 }
276
277 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
278 {
279 if(!state.stencilActive)
280 {
281 return;
282 }
283
284 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
285
286 Pointer<Byte> buffer = sBuffer + 2 * x;
287
288 if(q > 0)
289 {
290 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
291 }
292
293 Byte8 value = *Pointer<Byte8>(buffer);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700294 Byte8 valueBack = value;
Nicolas Capens68a82382018-10-02 13:16:55 -0400295
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700296 if(state.frontStencil.compareMask != 0xff)
Nicolas Capens68a82382018-10-02 13:16:55 -0400297 {
298 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
299 }
300
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700301 stencilTest(value, state.frontStencil.compareOp, false);
Nicolas Capens68a82382018-10-02 13:16:55 -0400302
303 if(state.twoSidedStencil)
304 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700305 if(state.backStencil.compareMask != 0xff)
Nicolas Capens68a82382018-10-02 13:16:55 -0400306 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700307 valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400308 }
309
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700310 stencilTest(valueBack, state.backStencil.compareOp, true);
Nicolas Capens68a82382018-10-02 13:16:55 -0400311
312 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700313 valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
314 value |= valueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400315 }
316
317 sMask = SignMask(value) & cMask;
318 }
319
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700320 void PixelRoutine::stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack)
Nicolas Capens68a82382018-10-02 13:16:55 -0400321 {
322 Byte8 equal;
323
324 switch(stencilCompareMode)
325 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500326 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400327 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
328 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500329 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400330 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
331 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500332 case VK_COMPARE_OP_LESS: // a < b ~ b > a
Nicolas Capens68a82382018-10-02 13:16:55 -0400333 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700334 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400335 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500336 case VK_COMPARE_OP_EQUAL:
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700337 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400338 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500339 case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700340 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400341 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
342 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500343 case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
Nicolas Capens68a82382018-10-02 13:16:55 -0400344 equal = value;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700345 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400346 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700347 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400348 value |= equal;
349 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500350 case VK_COMPARE_OP_GREATER: // a > b
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700351 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400352 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
353 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
354 value = equal;
355 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500356 case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
Nicolas Capens68a82382018-10-02 13:16:55 -0400357 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700358 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400359 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
360 break;
361 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100362 UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
Nicolas Capens68a82382018-10-02 13:16:55 -0400363 }
364 }
365
Chris Forbesbea47512019-03-12 14:50:55 -0700366 Bool PixelRoutine::depthTest32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400367 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400368 Float4 Z = z;
369
Chris Forbes1845d5e2018-12-27 11:50:15 -0800370 if(spirvShader && spirvShader->getModes().DepthReplacing)
Nicolas Capens68a82382018-10-02 13:16:55 -0400371 {
Chris Forbesb4de34e2019-03-12 13:01:45 -0700372 Z = oDepth;
Nicolas Capens68a82382018-10-02 13:16:55 -0400373 }
374
375 Pointer<Byte> buffer;
376 Int pitch;
377
378 if(!state.quadLayoutDepthBuffer)
379 {
380 buffer = zBuffer + 4 * x;
381 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
382 }
383 else
384 {
385 buffer = zBuffer + 8 * x;
386 }
387
388 if(q > 0)
389 {
390 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
391 }
392
393 Float4 zValue;
394
Alexis Hetudcb803a2018-11-15 16:25:38 -0500395 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
Nicolas Capens68a82382018-10-02 13:16:55 -0400396 {
397 if(!state.quadLayoutDepthBuffer)
398 {
399 // FIXME: Properly optimizes?
400 zValue.xy = *Pointer<Float4>(buffer);
401 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
402 }
403 else
404 {
405 zValue = *Pointer<Float4>(buffer, 16);
406 }
407 }
408
409 Int4 zTest;
410
411 switch(state.depthCompareMode)
412 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500413 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400414 // Optimized
415 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500416 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400417 // Optimized
418 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500419 case VK_COMPARE_OP_EQUAL:
Nicolas Capens68a82382018-10-02 13:16:55 -0400420 zTest = CmpEQ(zValue, Z);
421 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500422 case VK_COMPARE_OP_NOT_EQUAL:
Nicolas Capens68a82382018-10-02 13:16:55 -0400423 zTest = CmpNEQ(zValue, Z);
424 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500425 case VK_COMPARE_OP_LESS:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700426 zTest = CmpNLE(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400427 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500428 case VK_COMPARE_OP_GREATER_OR_EQUAL:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700429 zTest = CmpLE(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400430 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500431 case VK_COMPARE_OP_LESS_OR_EQUAL:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700432 zTest = CmpNLT(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400433 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500434 case VK_COMPARE_OP_GREATER:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700435 zTest = CmpLT(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400436 break;
437 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100438 UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
Nicolas Capens68a82382018-10-02 13:16:55 -0400439 }
440
441 switch(state.depthCompareMode)
442 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500443 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400444 zMask = cMask;
445 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500446 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400447 zMask = 0x0;
448 break;
449 default:
450 zMask = SignMask(zTest) & cMask;
451 break;
452 }
453
454 if(state.stencilActive)
455 {
456 zMask &= sMask;
457 }
458
459 return zMask != 0;
460 }
461
Chris Forbesbea47512019-03-12 14:50:55 -0700462 Bool PixelRoutine::depthTest16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
463 {
464 Short4 Z = convertFixed16(z, true);
465
466 if(spirvShader && spirvShader->getModes().DepthReplacing)
467 {
468 Z = convertFixed16(oDepth, true);
469 }
470
471 Pointer<Byte> buffer;
472 Int pitch;
473
474 if(!state.quadLayoutDepthBuffer)
475 {
476 buffer = zBuffer + 2 * x;
477 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
478 }
479 else
480 {
481 buffer = zBuffer + 4 * x;
482 }
483
484 if(q > 0)
485 {
486 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
487 }
488
489 Short4 zValue;
490
491 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
492 {
493 if(!state.quadLayoutDepthBuffer)
494 {
495 // FIXME: Properly optimizes?
496 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
497 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
498 }
499 else
500 {
501 zValue = *Pointer<Short4>(buffer, 8);
502 }
503 }
504
505 Int4 zTest;
506
507 // Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
Alexis Hetu5078d482019-04-10 15:00:25 -0400508 zValue = zValue - Short4(0x8000u);
509 Z = Z - Short4(0x8000u);
Chris Forbesbea47512019-03-12 14:50:55 -0700510
511 switch(state.depthCompareMode)
512 {
513 case VK_COMPARE_OP_ALWAYS:
514 // Optimized
515 break;
516 case VK_COMPARE_OP_NEVER:
517 // Optimized
518 break;
519 case VK_COMPARE_OP_EQUAL:
520 zTest = Int4(CmpEQ(zValue, Z));
521 break;
522 case VK_COMPARE_OP_NOT_EQUAL:
523 zTest = ~Int4(CmpEQ(zValue, Z));
524 break;
525 case VK_COMPARE_OP_LESS:
526 zTest = Int4(CmpGT(zValue, Z));
527 break;
528 case VK_COMPARE_OP_GREATER_OR_EQUAL:
529 zTest = ~Int4(CmpGT(zValue, Z));
530 break;
531 case VK_COMPARE_OP_LESS_OR_EQUAL:
532 zTest = ~Int4(CmpGT(Z, zValue));
533 break;
534 case VK_COMPARE_OP_GREATER:
535 zTest = Int4(CmpGT(Z, zValue));
536 break;
537 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100538 UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
Chris Forbesbea47512019-03-12 14:50:55 -0700539 }
540
541 switch(state.depthCompareMode)
542 {
543 case VK_COMPARE_OP_ALWAYS:
544 zMask = cMask;
545 break;
546 case VK_COMPARE_OP_NEVER:
547 zMask = 0x0;
548 break;
549 default:
550 zMask = SignMask(zTest) & cMask;
551 break;
552 }
553
554 if(state.stencilActive)
555 {
556 zMask &= sMask;
557 }
558
559 return zMask != 0;
560 }
561
562 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
563 {
564 if(!state.depthTestActive)
565 {
566 return true;
567 }
568
569 if (state.depthFormat == VK_FORMAT_D16_UNORM)
570 return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
571 else
572 return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
573 }
574
Nicolas Capens68a82382018-10-02 13:16:55 -0400575 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
576 {
577 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
578 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
579 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
580 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
581
582 Int aMask0 = SignMask(coverage0);
583 Int aMask1 = SignMask(coverage1);
584 Int aMask2 = SignMask(coverage2);
585 Int aMask3 = SignMask(coverage3);
586
587 cMask[0] &= aMask0;
588 cMask[1] &= aMask1;
589 cMask[2] &= aMask2;
590 cMask[3] &= aMask3;
591 }
592
Chris Forbesbea47512019-03-12 14:50:55 -0700593 void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400594 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400595 Float4 Z = z;
596
Chris Forbes1845d5e2018-12-27 11:50:15 -0800597 if(spirvShader && spirvShader->getModes().DepthReplacing)
Nicolas Capens68a82382018-10-02 13:16:55 -0400598 {
Chris Forbesb4de34e2019-03-12 13:01:45 -0700599 Z = oDepth;
Nicolas Capens68a82382018-10-02 13:16:55 -0400600 }
601
602 Pointer<Byte> buffer;
603 Int pitch;
604
605 if(!state.quadLayoutDepthBuffer)
606 {
607 buffer = zBuffer + 4 * x;
608 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
609 }
610 else
611 {
612 buffer = zBuffer + 8 * x;
613 }
614
615 if(q > 0)
616 {
617 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
618 }
619
620 Float4 zValue;
621
Alexis Hetudcb803a2018-11-15 16:25:38 -0500622 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
Nicolas Capens68a82382018-10-02 13:16:55 -0400623 {
624 if(!state.quadLayoutDepthBuffer)
625 {
626 // FIXME: Properly optimizes?
627 zValue.xy = *Pointer<Float4>(buffer);
628 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
629 }
630 else
631 {
632 zValue = *Pointer<Float4>(buffer, 16);
633 }
634 }
635
636 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
637 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
638 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
639
640 if(!state.quadLayoutDepthBuffer)
641 {
642 // FIXME: Properly optimizes?
643 *Pointer<Float2>(buffer) = Float2(Z.xy);
644 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
645 }
646 else
647 {
648 *Pointer<Float4>(buffer, 16) = Z;
649 }
650 }
651
Chris Forbesbea47512019-03-12 14:50:55 -0700652 void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
653 {
654 Short4 Z = As<Short4>(convertFixed16(z, true));
655
656 if(spirvShader && spirvShader->getModes().DepthReplacing)
657 {
658 Z = As<Short4>(convertFixed16(oDepth, true));
659 }
660
661 Pointer<Byte> buffer;
662 Int pitch;
663
664 if(!state.quadLayoutDepthBuffer)
665 {
666 buffer = zBuffer + 2 * x;
667 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
668 }
669 else
670 {
671 buffer = zBuffer + 4 * x;
672 }
673
674 if(q > 0)
675 {
676 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
677 }
678
679 Short4 zValue;
680
681 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
682 {
683 if(!state.quadLayoutDepthBuffer)
684 {
685 // FIXME: Properly optimizes?
686 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
687 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
688 }
689 else
690 {
691 zValue = *Pointer<Short4>(buffer, 8);
692 }
693 }
694
695 Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8);
696 zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8);
697 Z = Z | zValue;
698
699 if(!state.quadLayoutDepthBuffer)
700 {
701 // FIXME: Properly optimizes?
702 *Pointer<Short>(buffer) = Extract(Z, 0);
703 *Pointer<Short>(buffer+2) = Extract(Z, 1);
704 *Pointer<Short>(buffer+pitch) = Extract(Z, 2);
705 *Pointer<Short>(buffer+pitch+2) = Extract(Z, 3);
706 }
707 else
708 {
709 *Pointer<Short4>(buffer, 8) = Z;
710 }
711 }
712
713 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
714 {
715 if(!state.depthWriteEnable)
716 {
717 return;
718 }
719
720 if (state.depthFormat == VK_FORMAT_D16_UNORM)
721 writeDepth16(zBuffer, q, x, z, zMask);
722 else
723 writeDepth32F(zBuffer, q, x, z, zMask);
724 }
725
Nicolas Capens68a82382018-10-02 13:16:55 -0400726 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
727 {
728 if(!state.stencilActive)
729 {
730 return;
731 }
732
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700733 if(state.frontStencil.passOp == VK_STENCIL_OP_KEEP && state.frontStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.frontStencil.failOp == VK_STENCIL_OP_KEEP)
Nicolas Capens68a82382018-10-02 13:16:55 -0400734 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700735 if(!state.twoSidedStencil || (state.backStencil.passOp == VK_STENCIL_OP_KEEP && state.backStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.backStencil.failOp == VK_STENCIL_OP_KEEP))
Nicolas Capens68a82382018-10-02 13:16:55 -0400736 {
737 return;
738 }
739 }
740
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700741 if((state.frontStencil.writeMask == 0) && (!state.twoSidedStencil || (state.backStencil.writeMask == 0)))
Nicolas Capens68a82382018-10-02 13:16:55 -0400742 {
743 return;
744 }
745
746 Pointer<Byte> buffer = sBuffer + 2 * x;
747
748 if(q > 0)
749 {
750 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
751 }
752
753 Byte8 bufferValue = *Pointer<Byte8>(buffer);
754
755 Byte8 newValue;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700756 stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400757
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700758 if(state.frontStencil.writeMask != 0)
Nicolas Capens68a82382018-10-02 13:16:55 -0400759 {
760 Byte8 maskedValue = bufferValue;
761 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
762 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
763 newValue |= maskedValue;
764 }
765
766 if(state.twoSidedStencil)
767 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700768 Byte8 newValueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400769
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700770 stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400771
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700772 if(state.backStencil.writeMask != 0)
Nicolas Capens68a82382018-10-02 13:16:55 -0400773 {
774 Byte8 maskedValue = bufferValue;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700775 newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400776 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700777 newValueBack |= maskedValue;
Nicolas Capens68a82382018-10-02 13:16:55 -0400778 }
779
780 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700781 newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
782 newValue |= newValueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400783 }
784
785 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
786 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
787 newValue |= bufferValue;
788
789 *Pointer<Byte4>(buffer) = Byte4(newValue);
790 }
791
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700792 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, VkStencilOpState const &ops, bool isBack, Int &zMask, Int &sMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400793 {
794 Byte8 &pass = newValue;
795 Byte8 fail;
796 Byte8 zFail;
797
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700798 stencilOperation(pass, bufferValue, ops.passOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400799
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700800 if(ops.depthFailOp != ops.passOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400801 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700802 stencilOperation(zFail, bufferValue, ops.depthFailOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400803 }
804
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700805 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400806 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700807 stencilOperation(fail, bufferValue, ops.failOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400808 }
809
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700810 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400811 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700812 if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same
Nicolas Capens68a82382018-10-02 13:16:55 -0400813 {
814 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
815 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
816 pass |= zFail;
817 }
818
819 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
820 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
821 pass |= fail;
822 }
823 }
824
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700825 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, VkStencilOp operation, bool isBack)
Nicolas Capens68a82382018-10-02 13:16:55 -0400826 {
827 switch(operation)
828 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500829 case VK_STENCIL_OP_KEEP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400830 output = bufferValue;
831 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500832 case VK_STENCIL_OP_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400833 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
834 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500835 case VK_STENCIL_OP_REPLACE:
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700836 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400837 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500838 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400839 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
840 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500841 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400842 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
843 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500844 case VK_STENCIL_OP_INVERT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400845 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
846 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500847 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400848 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
849 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500850 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400851 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
852 break;
853 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100854 UNIMPLEMENTED("VkStencilOp: %d", int(operation));
Nicolas Capens68a82382018-10-02 13:16:55 -0400855 }
856 }
857
Alexis Hetu4ad23222018-11-22 16:40:52 -0500858 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive)
Nicolas Capens68a82382018-10-02 13:16:55 -0400859 {
860 switch(blendFactorActive)
861 {
Alexis Hetu4ad23222018-11-22 16:40:52 -0500862 case VK_BLEND_FACTOR_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400863 // Optimized
864 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500865 case VK_BLEND_FACTOR_ONE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400866 // Optimized
867 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500868 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400869 blendFactor.x = current.x;
870 blendFactor.y = current.y;
871 blendFactor.z = current.z;
872 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500873 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400874 blendFactor.x = Short4(0xFFFFu) - current.x;
875 blendFactor.y = Short4(0xFFFFu) - current.y;
876 blendFactor.z = Short4(0xFFFFu) - current.z;
877 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500878 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400879 blendFactor.x = pixel.x;
880 blendFactor.y = pixel.y;
881 blendFactor.z = pixel.z;
882 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500883 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400884 blendFactor.x = Short4(0xFFFFu) - pixel.x;
885 blendFactor.y = Short4(0xFFFFu) - pixel.y;
886 blendFactor.z = Short4(0xFFFFu) - pixel.z;
887 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500888 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400889 blendFactor.x = current.w;
890 blendFactor.y = current.w;
891 blendFactor.z = current.w;
892 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500893 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400894 blendFactor.x = Short4(0xFFFFu) - current.w;
895 blendFactor.y = Short4(0xFFFFu) - current.w;
896 blendFactor.z = Short4(0xFFFFu) - current.w;
897 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500898 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400899 blendFactor.x = pixel.w;
900 blendFactor.y = pixel.w;
901 blendFactor.z = pixel.w;
902 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500903 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400904 blendFactor.x = Short4(0xFFFFu) - pixel.w;
905 blendFactor.y = Short4(0xFFFFu) - pixel.w;
906 blendFactor.z = Short4(0xFFFFu) - pixel.w;
907 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500908 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400909 blendFactor.x = Short4(0xFFFFu) - pixel.w;
910 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
911 blendFactor.y = blendFactor.x;
912 blendFactor.z = blendFactor.x;
913 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500914 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400915 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
916 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
917 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
918 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500919 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400920 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
921 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
922 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
923 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500924 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400925 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
926 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
927 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
928 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500929 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400930 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
931 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
932 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
933 break;
934 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100935 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens68a82382018-10-02 13:16:55 -0400936 }
937 }
938
Alexis Hetu4ad23222018-11-22 16:40:52 -0500939 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive)
Nicolas Capens68a82382018-10-02 13:16:55 -0400940 {
941 switch(blendFactorAlphaActive)
942 {
Alexis Hetu4ad23222018-11-22 16:40:52 -0500943 case VK_BLEND_FACTOR_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400944 // Optimized
945 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500946 case VK_BLEND_FACTOR_ONE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400947 // Optimized
948 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500949 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400950 blendFactor.w = current.w;
951 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500952 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400953 blendFactor.w = Short4(0xFFFFu) - current.w;
954 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500955 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400956 blendFactor.w = pixel.w;
957 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500958 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400959 blendFactor.w = Short4(0xFFFFu) - pixel.w;
960 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500961 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400962 blendFactor.w = current.w;
963 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500964 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400965 blendFactor.w = Short4(0xFFFFu) - current.w;
966 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500967 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400968 blendFactor.w = pixel.w;
969 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500970 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400971 blendFactor.w = Short4(0xFFFFu) - pixel.w;
972 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500973 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400974 blendFactor.w = Short4(0xFFFFu);
975 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500976 case VK_BLEND_FACTOR_CONSTANT_COLOR:
977 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400978 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
979 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500980 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
981 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400982 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
983 break;
984 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100985 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens68a82382018-10-02 13:16:55 -0400986 }
987 }
988
989 bool PixelRoutine::isSRGB(int index) const
990 {
Alexis Hetu25ec7b02019-03-12 14:19:22 -0400991 return vk::Format(state.targetFormat[index]).isSRGBformat();
Nicolas Capens68a82382018-10-02 13:16:55 -0400992 }
993
994 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
995 {
996 Short4 c01;
997 Short4 c23;
998 Pointer<Byte> buffer;
999 Pointer<Byte> buffer2;
1000
1001 switch(state.targetFormat[index])
1002 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001003 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001004 buffer = cBuffer + 2 * x;
1005 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1006 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1007
1008 pixel.x = c01 & Short4(0xF800u);
1009 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1010 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1011 pixel.w = Short4(0xFFFFu);
1012 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001013 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001014 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001015 buffer = cBuffer + 4 * x;
1016 c01 = *Pointer<Short4>(buffer);
1017 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1018 c23 = *Pointer<Short4>(buffer);
1019 pixel.z = c01;
1020 pixel.y = c01;
1021 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1022 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1023 pixel.x = pixel.z;
1024 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1025 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1026 pixel.y = pixel.z;
1027 pixel.w = pixel.x;
1028 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1029 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1030 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1031 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1032 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001033 case VK_FORMAT_R8G8B8A8_UNORM:
1034 case VK_FORMAT_R8G8B8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001035 buffer = cBuffer + 4 * x;
1036 c01 = *Pointer<Short4>(buffer);
1037 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1038 c23 = *Pointer<Short4>(buffer);
1039 pixel.z = c01;
1040 pixel.y = c01;
1041 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1042 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1043 pixel.x = pixel.z;
1044 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1045 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1046 pixel.y = pixel.z;
1047 pixel.w = pixel.x;
1048 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1049 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1050 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1051 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1052 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001053 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001054 buffer = cBuffer + 1 * x;
1055 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1056 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1057 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1058 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1059 pixel.y = Short4(0x0000);
1060 pixel.z = Short4(0x0000);
1061 pixel.w = Short4(0xFFFFu);
1062 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001063 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001064 buffer = cBuffer + 2 * x;
1065 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
1066 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1067 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1068 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1069 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1070 pixel.z = Short4(0x0000u);
1071 pixel.w = Short4(0xFFFFu);
1072 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001073 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001074 buffer = cBuffer;
1075 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1076 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
1077 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1078 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1079 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1080 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1081 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001082 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001083 buffer = cBuffer;
1084 pixel.x = *Pointer<Short4>(buffer + 4 * x);
1085 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1086 pixel.y = *Pointer<Short4>(buffer + 4 * x);
1087 pixel.z = pixel.x;
1088 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1089 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1090 pixel.y = pixel.z;
1091 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1092 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1093 pixel.z = Short4(0xFFFFu);
1094 pixel.w = Short4(0xFFFFu);
1095 break;
1096 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001097 UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
Nicolas Capens68a82382018-10-02 13:16:55 -04001098 }
1099
Chris Forbes37f2bd82019-04-19 17:24:36 -07001100 if(postBlendSRGB || isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001101 {
1102 sRGBtoLinear16_12_16(pixel);
1103 }
1104 }
1105
1106 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1107 {
1108 if(!state.alphaBlendActive)
1109 {
1110 return;
1111 }
1112
1113 Vector4s pixel;
1114 readPixel(index, cBuffer, x, pixel);
1115
1116 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1117 Vector4s sourceFactor;
1118 Vector4s destFactor;
1119
1120 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1121 blendFactor(destFactor, current, pixel, state.destBlendFactor);
1122
Alexis Hetu4ad23222018-11-22 16:40:52 -05001123 if(state.sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001124 {
1125 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1126 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1127 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
1128 }
1129
Alexis Hetu4ad23222018-11-22 16:40:52 -05001130 if(state.destBlendFactor != VK_BLEND_FACTOR_ONE && state.destBlendFactor != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001131 {
1132 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1133 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1134 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
1135 }
1136
1137 switch(state.blendOperation)
1138 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001139 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001140 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1141 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1142 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1143 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001144 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001145 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1146 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1147 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1148 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001149 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001150 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1151 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1152 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
1153 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001154 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001155 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1156 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1157 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
1158 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001159 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001160 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1161 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1162 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
1163 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001164 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001165 // No operation
1166 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001167 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001168 current.x = pixel.x;
1169 current.y = pixel.y;
1170 current.z = pixel.z;
1171 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001172 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001173 current.x = Short4(0x0000);
1174 current.y = Short4(0x0000);
1175 current.z = Short4(0x0000);
1176 break;
1177 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001178 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperation));
Nicolas Capens68a82382018-10-02 13:16:55 -04001179 }
1180
1181 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1182 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
1183
Alexis Hetu4ad23222018-11-22 16:40:52 -05001184 if(state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001185 {
1186 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
1187 }
1188
Alexis Hetu4ad23222018-11-22 16:40:52 -05001189 if(state.destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001190 {
1191 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
1192 }
1193
1194 switch(state.blendOperationAlpha)
1195 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001196 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001197 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1198 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001199 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001200 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1201 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001202 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001203 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
1204 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001205 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001206 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
1207 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001208 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001209 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
1210 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001211 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001212 // No operation
1213 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001214 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001215 current.w = pixel.w;
1216 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001217 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001218 current.w = Short4(0x0000);
1219 break;
1220 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001221 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperationAlpha));
Nicolas Capens68a82382018-10-02 13:16:55 -04001222 }
1223 }
1224
Nicolas Capens68a82382018-10-02 13:16:55 -04001225 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
1226 {
Chris Forbes37f2bd82019-04-19 17:24:36 -07001227 if(postBlendSRGB || isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001228 {
1229 linearToSRGB16_12_16(current);
1230 }
1231
1232 if(exactColorRounding)
1233 {
1234 switch(state.targetFormat[index])
1235 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001236 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001237 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1238 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1239 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
1240 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001241 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001242 case VK_FORMAT_B8G8R8A8_SRGB:
Alexis Hetudd152e12018-11-14 13:39:28 -05001243 case VK_FORMAT_R8G8B8A8_UNORM:
1244 case VK_FORMAT_R8G8B8A8_SRGB:
1245 case VK_FORMAT_R8G8_UNORM:
1246 case VK_FORMAT_R8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001247 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1248 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001249 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1250 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1251 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1252 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
1253 break;
1254 default:
1255 break;
1256 }
1257 }
1258
1259 int rgbaWriteMask = state.colorWriteActive(index);
1260 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1261
1262 switch(state.targetFormat[index])
1263 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001264 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001265 {
1266 current.x = current.x & Short4(0xF800u);
1267 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1268 current.z = As<UShort4>(current.z) >> 11;
1269
1270 current.x = current.x | current.y | current.z;
1271 }
1272 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001273 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001274 case VK_FORMAT_B8G8R8A8_SRGB:
Alexis Hetu8aa74a42018-10-22 14:54:09 -04001275 if(rgbaWriteMask == 0x7)
Nicolas Capens68a82382018-10-02 13:16:55 -04001276 {
1277 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1278 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1279 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1280
1281 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1282 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1283
1284 current.x = current.z;
1285 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1286 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1287 current.y = current.z;
1288 current.z = As<Short4>(UnpackLow(current.z, current.x));
1289 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1290 }
1291 else
1292 {
1293 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1294 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1295 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1296 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1297
1298 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1299 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1300
1301 current.x = current.z;
1302 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1303 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1304 current.y = current.z;
1305 current.z = As<Short4>(UnpackLow(current.z, current.x));
1306 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1307 }
1308 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001309 case VK_FORMAT_R8G8B8A8_UNORM:
1310 case VK_FORMAT_R8G8B8A8_SRGB:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001311 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1312 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Alexis Hetu8aa74a42018-10-22 14:54:09 -04001313 if(rgbaWriteMask == 0x7)
Nicolas Capens68a82382018-10-02 13:16:55 -04001314 {
1315 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1316 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1317 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1318
1319 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1320 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1321
1322 current.x = current.z;
1323 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1324 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1325 current.y = current.z;
1326 current.z = As<Short4>(UnpackLow(current.z, current.x));
1327 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1328 }
1329 else
1330 {
1331 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1332 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1333 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1334 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1335
1336 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1337 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1338
1339 current.x = current.z;
1340 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1341 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1342 current.y = current.z;
1343 current.z = As<Short4>(UnpackLow(current.z, current.x));
1344 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1345 }
1346 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001347 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001348 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1349 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1350 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1351 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1352 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1353 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001354 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001355 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1356 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1357 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001358 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001359 current.z = current.x;
1360 current.x = As<Short4>(UnpackLow(current.x, current.y));
1361 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1362 current.y = current.z;
1363 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001364 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001365 transpose4x4(current.x, current.y, current.z, current.w);
1366 break;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001367 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1368 {
1369 auto r = Int4(current.x) & Int4(0x3ff);
1370 auto g = Int4(current.y) & Int4(0x3ff);
1371 auto b = Int4(current.z) & Int4(0x3ff);
1372 auto a = Int4(current.w) & Int4(0x3);
1373 Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
1374 auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
1375 auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
1376 current.x = UnpackLow(c02, c13);
1377 current.y = UnpackHigh(c02, c13);
1378 break;
1379 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001380 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001381 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001382 }
1383
1384 Short4 c01 = current.z;
1385 Short4 c23 = current.y;
1386
1387 Int xMask; // Combination of all masks
1388
1389 if(state.depthTestActive)
1390 {
1391 xMask = zMask;
1392 }
1393 else
1394 {
1395 xMask = cMask;
1396 }
1397
1398 if(state.stencilActive)
1399 {
1400 xMask &= sMask;
1401 }
1402
1403 switch(state.targetFormat[index])
1404 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001405 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001406 {
1407 Pointer<Byte> buffer = cBuffer + 2 * x;
1408 Int value = *Pointer<Int>(buffer);
1409
1410 Int c01 = Extract(As<Int2>(current.x), 0);
1411
1412 if((bgraWriteMask & 0x00000007) != 0x00000007)
1413 {
1414 Int masked = value;
1415 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1416 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1417 c01 |= masked;
1418 }
1419
1420 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1421 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
1422 c01 |= value;
1423 *Pointer<Int>(buffer) = c01;
1424
1425 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1426 value = *Pointer<Int>(buffer);
1427
1428 Int c23 = Extract(As<Int2>(current.x), 1);
1429
1430 if((bgraWriteMask & 0x00000007) != 0x00000007)
1431 {
1432 Int masked = value;
1433 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1434 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1435 c23 |= masked;
1436 }
1437
1438 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1439 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
1440 c23 |= value;
1441 *Pointer<Int>(buffer) = c23;
1442 }
1443 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001444 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001445 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001446 {
1447 Pointer<Byte> buffer = cBuffer + x * 4;
1448 Short4 value = *Pointer<Short4>(buffer);
1449
Chris Forbes6407c1a2019-04-15 17:22:57 -07001450 if(bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001451 {
1452 Short4 masked = value;
1453 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1454 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1455 c01 |= masked;
1456 }
1457
1458 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1459 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1460 c01 |= value;
1461 *Pointer<Short4>(buffer) = c01;
1462
1463 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1464 value = *Pointer<Short4>(buffer);
1465
Chris Forbes6407c1a2019-04-15 17:22:57 -07001466 if(bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001467 {
1468 Short4 masked = value;
1469 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1470 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1471 c23 |= masked;
1472 }
1473
1474 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1475 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1476 c23 |= value;
1477 *Pointer<Short4>(buffer) = c23;
1478 }
1479 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001480 case VK_FORMAT_R8G8B8A8_UNORM:
1481 case VK_FORMAT_R8G8B8A8_SRGB:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001482 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1483 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001484 {
1485 Pointer<Byte> buffer = cBuffer + x * 4;
1486 Short4 value = *Pointer<Short4>(buffer);
1487
Chris Forbes6407c1a2019-04-15 17:22:57 -07001488 bool masked = (rgbaWriteMask != 0x0000000F); // FIXME: Need for masking when XBGR && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001489
1490 if(masked)
1491 {
1492 Short4 masked = value;
1493 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1494 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1495 c01 |= masked;
1496 }
1497
1498 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1499 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1500 c01 |= value;
1501 *Pointer<Short4>(buffer) = c01;
1502
1503 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1504 value = *Pointer<Short4>(buffer);
1505
1506 if(masked)
1507 {
1508 Short4 masked = value;
1509 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1510 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1511 c23 |= masked;
1512 }
1513
1514 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1515 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1516 c23 |= value;
1517 *Pointer<Short4>(buffer) = c23;
1518 }
1519 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001520 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001521 if((rgbaWriteMask & 0x00000003) != 0x0)
1522 {
1523 Pointer<Byte> buffer = cBuffer + 2 * x;
1524 Int2 value;
1525 value = Insert(value, *Pointer<Int>(buffer), 0);
1526 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1527 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1528
1529 Int2 packedCol = As<Int2>(current.x);
1530
1531 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1532 if((rgbaWriteMask & 0x3) != 0x3)
1533 {
1534 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1535 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1536 mergedMask &= rgbaMask;
1537 }
1538
1539 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1540
1541 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1542 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1543 }
1544 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001545 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001546 if(rgbaWriteMask & 0x00000001)
1547 {
1548 Pointer<Byte> buffer = cBuffer + 1 * x;
1549 Short4 value;
1550 value = Insert(value, *Pointer<Short>(buffer), 0);
1551 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1552 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
1553
1554 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1555 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1556 current.x |= value;
1557
1558 *Pointer<Short>(buffer) = Extract(current.x, 0);
1559 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1560 }
1561 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001562 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001563 {
1564 Pointer<Byte> buffer = cBuffer + 4 * x;
1565
1566 Short4 value = *Pointer<Short4>(buffer);
1567
1568 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1569 {
1570 Short4 masked = value;
1571 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1572 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1573 current.x |= masked;
1574 }
1575
1576 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1577 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1578 current.x |= value;
1579 *Pointer<Short4>(buffer) = current.x;
1580
1581 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1582
1583 value = *Pointer<Short4>(buffer);
1584
1585 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1586 {
1587 Short4 masked = value;
1588 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1589 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1590 current.y |= masked;
1591 }
1592
1593 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1594 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1595 current.y |= value;
1596 *Pointer<Short4>(buffer) = current.y;
1597 }
1598 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001599 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001600 {
1601 Pointer<Byte> buffer = cBuffer + 8 * x;
1602
1603 {
1604 Short4 value = *Pointer<Short4>(buffer);
1605
1606 if(rgbaWriteMask != 0x0000000F)
1607 {
1608 Short4 masked = value;
1609 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1610 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1611 current.x |= masked;
1612 }
1613
1614 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1615 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
1616 current.x |= value;
1617 *Pointer<Short4>(buffer) = current.x;
1618 }
1619
1620 {
1621 Short4 value = *Pointer<Short4>(buffer + 8);
1622
1623 if(rgbaWriteMask != 0x0000000F)
1624 {
1625 Short4 masked = value;
1626 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1627 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1628 current.y |= masked;
1629 }
1630
1631 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1632 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
1633 current.y |= value;
1634 *Pointer<Short4>(buffer + 8) = current.y;
1635 }
1636
1637 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1638
1639 {
1640 Short4 value = *Pointer<Short4>(buffer);
1641
1642 if(rgbaWriteMask != 0x0000000F)
1643 {
1644 Short4 masked = value;
1645 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1646 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1647 current.z |= masked;
1648 }
1649
1650 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1651 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
1652 current.z |= value;
1653 *Pointer<Short4>(buffer) = current.z;
1654 }
1655
1656 {
1657 Short4 value = *Pointer<Short4>(buffer + 8);
1658
1659 if(rgbaWriteMask != 0x0000000F)
1660 {
1661 Short4 masked = value;
1662 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1663 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1664 current.w |= masked;
1665 }
1666
1667 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1668 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
1669 current.w |= value;
1670 *Pointer<Short4>(buffer + 8) = current.w;
1671 }
1672 }
1673 break;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001674 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1675 {
1676 Pointer<Byte> buffer = cBuffer + 4 * x;
1677
1678 buffer = cBuffer + 4 * x;
1679 Int2 value = *Pointer<Int2>(buffer, 16);
1680 Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1681 if (rgbaWriteMask != 0xF)
1682 {
1683 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1684 }
1685 *Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
1686
1687 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1688
1689 value = *Pointer<Int2>(buffer, 16);
1690 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1691 if (rgbaWriteMask != 0xF)
1692 {
1693 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1694 }
1695 *Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
1696 }
1697 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001698 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001699 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001700 }
1701 }
1702
Alexis Hetu4ad23222018-11-22 16:40:52 -05001703 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorActive)
Nicolas Capens68a82382018-10-02 13:16:55 -04001704 {
1705 switch(blendFactorActive)
1706 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001707 case VK_BLEND_FACTOR_ZERO:
Chris Forbes4d659342019-05-10 13:40:00 -07001708 blendFactor.x = Float4(0);
1709 blendFactor.y = Float4(0);
1710 blendFactor.z = Float4(0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001711 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001712 case VK_BLEND_FACTOR_ONE:
Chris Forbes4d659342019-05-10 13:40:00 -07001713 blendFactor.x = Float4(1);
1714 blendFactor.y = Float4(1);
1715 blendFactor.z = Float4(1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001716 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001717 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001718 blendFactor.x = oC.x;
1719 blendFactor.y = oC.y;
1720 blendFactor.z = oC.z;
1721 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001722 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001723 blendFactor.x = Float4(1.0f) - oC.x;
1724 blendFactor.y = Float4(1.0f) - oC.y;
1725 blendFactor.z = Float4(1.0f) - oC.z;
1726 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001727 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001728 blendFactor.x = pixel.x;
1729 blendFactor.y = pixel.y;
1730 blendFactor.z = pixel.z;
1731 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001732 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001733 blendFactor.x = Float4(1.0f) - pixel.x;
1734 blendFactor.y = Float4(1.0f) - pixel.y;
1735 blendFactor.z = Float4(1.0f) - pixel.z;
1736 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001737 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001738 blendFactor.x = oC.w;
1739 blendFactor.y = oC.w;
1740 blendFactor.z = oC.w;
1741 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001742 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001743 blendFactor.x = Float4(1.0f) - oC.w;
1744 blendFactor.y = Float4(1.0f) - oC.w;
1745 blendFactor.z = Float4(1.0f) - oC.w;
1746 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001747 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001748 blendFactor.x = pixel.w;
1749 blendFactor.y = pixel.w;
1750 blendFactor.z = pixel.w;
1751 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001752 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001753 blendFactor.x = Float4(1.0f) - pixel.w;
1754 blendFactor.y = Float4(1.0f) - pixel.w;
1755 blendFactor.z = Float4(1.0f) - pixel.w;
1756 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001757 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -04001758 blendFactor.x = Float4(1.0f) - pixel.w;
1759 blendFactor.x = Min(blendFactor.x, oC.w);
1760 blendFactor.y = blendFactor.x;
1761 blendFactor.z = blendFactor.x;
1762 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001763 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001764 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1765 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1766 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
1767 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001768 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1769 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1770 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1771 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1772 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001773 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001774 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1775 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1776 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
1777 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001778 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1779 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1780 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1781 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1782 break;
1783
Nicolas Capens68a82382018-10-02 13:16:55 -04001784 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001785 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens68a82382018-10-02 13:16:55 -04001786 }
1787 }
1788
Alexis Hetu4ad23222018-11-22 16:40:52 -05001789 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorAlphaActive)
Nicolas Capens68a82382018-10-02 13:16:55 -04001790 {
1791 switch(blendFactorAlphaActive)
1792 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001793 case VK_BLEND_FACTOR_ZERO:
Chris Forbes4d659342019-05-10 13:40:00 -07001794 blendFactor.w = Float4(0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001795 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001796 case VK_BLEND_FACTOR_ONE:
Chris Forbes4d659342019-05-10 13:40:00 -07001797 blendFactor.w = Float4(1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001798 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001799 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001800 blendFactor.w = oC.w;
1801 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001802 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001803 blendFactor.w = Float4(1.0f) - oC.w;
1804 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001805 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001806 blendFactor.w = pixel.w;
1807 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001808 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001809 blendFactor.w = Float4(1.0f) - pixel.w;
1810 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001811 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001812 blendFactor.w = oC.w;
1813 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001814 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001815 blendFactor.w = Float4(1.0f) - oC.w;
1816 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001817 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001818 blendFactor.w = pixel.w;
1819 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001820 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001821 blendFactor.w = Float4(1.0f) - pixel.w;
1822 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001823 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -04001824 blendFactor.w = Float4(1.0f);
1825 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001826 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Ben Clayton8ab40532019-05-10 16:23:13 +01001827 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001828 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1829 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001830 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Ben Clayton8ab40532019-05-10 16:23:13 +01001831 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001832 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1833 break;
1834 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001835 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens68a82382018-10-02 13:16:55 -04001836 }
1837 }
1838
1839 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
1840 {
1841 if(!state.alphaBlendActive)
1842 {
1843 return;
1844 }
1845
1846 Pointer<Byte> buffer;
Ben Clayton8ab40532019-05-10 16:23:13 +01001847
1848 // pixel holds four texel color values.
1849 // Note: Despite the type being Vector4f, the colors may be stored as
1850 // integers. Half-floats are stored as full 32-bit floats.
1851 // Non-float and non-fixed point formats are not alpha blended.
Nicolas Capens68a82382018-10-02 13:16:55 -04001852 Vector4f pixel;
1853
1854 Vector4s color;
1855 Short4 c01;
1856 Short4 c23;
1857
1858 Float4 one;
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001859 vk::Format format(state.targetFormat[index]);
1860 if(format.isFloatFormat())
Nicolas Capens68a82382018-10-02 13:16:55 -04001861 {
1862 one = Float4(1.0f);
1863 }
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001864 else if(format.isNonNormalizedInteger())
Nicolas Capens68a82382018-10-02 13:16:55 -04001865 {
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001866 one = As<Float4>(format.isUnsignedComponent(0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Nicolas Capens68a82382018-10-02 13:16:55 -04001867 }
1868
1869 switch(state.targetFormat[index])
1870 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001871 case VK_FORMAT_R32_SINT:
1872 case VK_FORMAT_R32_UINT:
1873 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001874 buffer = cBuffer;
1875 // FIXME: movlps
1876 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
1877 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
1878 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1879 // FIXME: movhps
1880 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
1881 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
1882 pixel.y = pixel.z = pixel.w = one;
1883 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001884 case VK_FORMAT_R32G32_SINT:
1885 case VK_FORMAT_R32G32_UINT:
1886 case VK_FORMAT_R32G32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001887 buffer = cBuffer;
1888 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
1889 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1890 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
1891 pixel.z = pixel.x;
1892 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
1893 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
1894 pixel.y = pixel.z;
1895 pixel.z = pixel.w = one;
1896 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001897 case VK_FORMAT_R32G32B32A32_SFLOAT:
1898 case VK_FORMAT_R32G32B32A32_SINT:
1899 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001900 buffer = cBuffer;
1901 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
1902 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1903 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1904 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
1905 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1906 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Nicolas Capens68a82382018-10-02 13:16:55 -04001907 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001908 case VK_FORMAT_R16_SFLOAT:
1909 buffer = cBuffer;
1910 pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
1911 pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
1912 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1913 pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
1914 pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
1915 pixel.y = pixel.z = pixel.w = one;
1916 break;
1917 case VK_FORMAT_R16G16_SFLOAT:
1918 buffer = cBuffer;
1919 pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
1920 pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
1921 pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
1922 pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
1923 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1924 pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
1925 pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
1926 pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
1927 pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
1928 pixel.z = pixel.w = one;
1929 break;
1930 case VK_FORMAT_R16G16B16A16_SFLOAT:
1931 buffer = cBuffer;
1932 pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
1933 pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
1934 pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
1935 pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
1936 pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
1937 pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
1938 pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
1939 pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
1940 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1941 pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
1942 pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
1943 pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
1944 pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
1945 pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
1946 pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
1947 pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
1948 pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
1949 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001950 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001951 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001952 }
1953
Chris Forbes37f2bd82019-04-19 17:24:36 -07001954 if(postBlendSRGB || isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001955 {
1956 sRGBtoLinear(pixel.x);
1957 sRGBtoLinear(pixel.y);
1958 sRGBtoLinear(pixel.z);
1959 }
1960
1961 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1962 Vector4f sourceFactor;
1963 Vector4f destFactor;
1964
1965 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
1966 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
1967
Chris Forbes4d659342019-05-10 13:40:00 -07001968 oC.x *= sourceFactor.x;
1969 oC.y *= sourceFactor.y;
1970 oC.z *= sourceFactor.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001971
Chris Forbes4d659342019-05-10 13:40:00 -07001972 pixel.x *= destFactor.x;
1973 pixel.y *= destFactor.y;
1974 pixel.z *= destFactor.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001975
1976 switch(state.blendOperation)
1977 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001978 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001979 oC.x += pixel.x;
1980 oC.y += pixel.y;
1981 oC.z += pixel.z;
1982 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001983 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001984 oC.x -= pixel.x;
1985 oC.y -= pixel.y;
1986 oC.z -= pixel.z;
1987 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001988 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001989 oC.x = pixel.x - oC.x;
1990 oC.y = pixel.y - oC.y;
1991 oC.z = pixel.z - oC.z;
1992 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001993 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001994 oC.x = Min(oC.x, pixel.x);
1995 oC.y = Min(oC.y, pixel.y);
1996 oC.z = Min(oC.z, pixel.z);
1997 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001998 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001999 oC.x = Max(oC.x, pixel.x);
2000 oC.y = Max(oC.y, pixel.y);
2001 oC.z = Max(oC.z, pixel.z);
2002 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002003 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002004 // No operation
2005 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002006 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002007 oC.x = pixel.x;
2008 oC.y = pixel.y;
2009 oC.z = pixel.z;
2010 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002011 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002012 oC.x = Float4(0.0f);
2013 oC.y = Float4(0.0f);
2014 oC.z = Float4(0.0f);
2015 break;
2016 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01002017 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperation));
Nicolas Capens68a82382018-10-02 13:16:55 -04002018 }
2019
2020 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2021 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
2022
Chris Forbes4d659342019-05-10 13:40:00 -07002023 oC.w *= sourceFactor.w;
2024 pixel.w *= destFactor.w;
Nicolas Capens68a82382018-10-02 13:16:55 -04002025
2026 switch(state.blendOperationAlpha)
2027 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05002028 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04002029 oC.w += pixel.w;
2030 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002031 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002032 oC.w -= pixel.w;
2033 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002034 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002035 pixel.w -= oC.w;
2036 oC.w = pixel.w;
2037 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002038 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04002039 oC.w = Min(oC.w, pixel.w);
2040 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002041 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04002042 oC.w = Max(oC.w, pixel.w);
2043 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002044 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002045 // No operation
2046 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002047 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002048 oC.w = pixel.w;
2049 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002050 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002051 oC.w = Float4(0.0f);
2052 break;
2053 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01002054 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperationAlpha));
Nicolas Capens68a82382018-10-02 13:16:55 -04002055 }
2056 }
2057
2058 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
2059 {
2060 switch(state.targetFormat[index])
2061 {
Ben Clayton8ab40532019-05-10 16:23:13 +01002062 case VK_FORMAT_R16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002063 case VK_FORMAT_R32_SFLOAT:
2064 case VK_FORMAT_R32_SINT:
2065 case VK_FORMAT_R32_UINT:
2066 case VK_FORMAT_R16_SINT:
2067 case VK_FORMAT_R16_UINT:
2068 case VK_FORMAT_R8_SINT:
2069 case VK_FORMAT_R8_UINT:
Chris Forbesb0f37162019-05-03 07:25:58 -07002070 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002071 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002072 case VK_FORMAT_R16G16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002073 case VK_FORMAT_R32G32_SFLOAT:
2074 case VK_FORMAT_R32G32_SINT:
2075 case VK_FORMAT_R32G32_UINT:
2076 case VK_FORMAT_R16G16_SINT:
2077 case VK_FORMAT_R16G16_UINT:
2078 case VK_FORMAT_R8G8_SINT:
2079 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002080 oC.z = oC.x;
2081 oC.x = UnpackLow(oC.x, oC.y);
2082 oC.z = UnpackHigh(oC.z, oC.y);
2083 oC.y = oC.z;
2084 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002085 case VK_FORMAT_R16G16B16A16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002086 case VK_FORMAT_R32G32B32A32_SFLOAT:
2087 case VK_FORMAT_R32G32B32A32_SINT:
2088 case VK_FORMAT_R32G32B32A32_UINT:
2089 case VK_FORMAT_R16G16B16A16_SINT:
2090 case VK_FORMAT_R16G16B16A16_UINT:
2091 case VK_FORMAT_R8G8B8A8_SINT:
2092 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbes6407c1a2019-04-15 17:22:57 -07002093 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2094 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002095 transpose4x4(oC.x, oC.y, oC.z, oC.w);
2096 break;
2097 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01002098 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04002099 }
2100
2101 int rgbaWriteMask = state.colorWriteActive(index);
2102
2103 Int xMask; // Combination of all masks
2104
2105 if(state.depthTestActive)
2106 {
2107 xMask = zMask;
2108 }
2109 else
2110 {
2111 xMask = cMask;
2112 }
2113
2114 if(state.stencilActive)
2115 {
2116 xMask &= sMask;
2117 }
2118
Ben Clayton8ab40532019-05-10 16:23:13 +01002119 auto targetFormat = state.targetFormat[index];
2120
Nicolas Capens68a82382018-10-02 13:16:55 -04002121 Pointer<Byte> buffer;
2122 Float4 value;
2123
Ben Clayton8ab40532019-05-10 16:23:13 +01002124 switch(targetFormat)
Nicolas Capens68a82382018-10-02 13:16:55 -04002125 {
Alexis Hetudd152e12018-11-14 13:39:28 -05002126 case VK_FORMAT_R32_SFLOAT:
2127 case VK_FORMAT_R32_SINT:
2128 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002129 if(rgbaWriteMask & 0x00000001)
2130 {
2131 buffer = cBuffer + 4 * x;
2132
2133 // FIXME: movlps
2134 value.x = *Pointer<Float>(buffer + 0);
2135 value.y = *Pointer<Float>(buffer + 4);
2136
2137 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2138
2139 // FIXME: movhps
2140 value.z = *Pointer<Float>(buffer + 0);
2141 value.w = *Pointer<Float>(buffer + 4);
2142
2143 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2144 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
2145 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2146
2147 // FIXME: movhps
2148 *Pointer<Float>(buffer + 0) = oC.x.z;
2149 *Pointer<Float>(buffer + 4) = oC.x.w;
2150
2151 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2152
2153 // FIXME: movlps
2154 *Pointer<Float>(buffer + 0) = oC.x.x;
2155 *Pointer<Float>(buffer + 4) = oC.x.y;
2156 }
2157 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002158 case VK_FORMAT_R16_SFLOAT:
2159 if(rgbaWriteMask & 0x00000001)
2160 {
2161 buffer = cBuffer + 2 * x;
2162
2163 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
2164 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
2165
2166 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2167
2168 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
2169 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
2170
2171 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2172 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2173 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2174
2175 *Pointer<Half>(buffer + 0) = Half(oC.x.z);
2176 *Pointer<Half>(buffer + 2) = Half(oC.x.w);
2177
2178 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2179
2180 *Pointer<Half>(buffer + 0) = Half(oC.x.x);
2181 *Pointer<Half>(buffer + 2) = Half(oC.x.y);
2182 }
2183 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002184 case VK_FORMAT_R16_SINT:
2185 case VK_FORMAT_R16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002186 if(rgbaWriteMask & 0x00000001)
2187 {
2188 buffer = cBuffer + 2 * x;
2189
2190 UShort4 xyzw;
2191 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2192
2193 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2194
2195 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2196 value = As<Float4>(Int4(xyzw));
2197
2198 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2199 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2200 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2201
Ben Clayton8ab40532019-05-10 16:23:13 +01002202 if(targetFormat == VK_FORMAT_R16_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002203 {
2204 Float component = oC.x.z;
2205 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2206 component = oC.x.w;
2207 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2208
2209 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2210
2211 component = oC.x.x;
2212 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2213 component = oC.x.y;
2214 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2215 }
Alexis Hetudd152e12018-11-14 13:39:28 -05002216 else // VK_FORMAT_R16_UINT
Nicolas Capens68a82382018-10-02 13:16:55 -04002217 {
2218 Float component = oC.x.z;
2219 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2220 component = oC.x.w;
2221 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2222
2223 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2224
2225 component = oC.x.x;
2226 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2227 component = oC.x.y;
2228 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2229 }
2230 }
2231 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002232 case VK_FORMAT_R8_SINT:
2233 case VK_FORMAT_R8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002234 if(rgbaWriteMask & 0x00000001)
2235 {
2236 buffer = cBuffer + x;
2237
2238 UInt xyzw, packedCol;
2239
2240 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
2241 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2242 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
2243
2244 Short4 tmpCol = Short4(As<Int4>(oC.x));
Ben Clayton8ab40532019-05-10 16:23:13 +01002245 if(targetFormat == VK_FORMAT_R8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002246 {
2247 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
2248 }
2249 else
2250 {
2251 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
2252 }
2253 packedCol = Extract(As<Int2>(tmpCol), 0);
2254
2255 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2256 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2257
2258 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2259 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2260 *Pointer<UShort>(buffer) = UShort(packedCol);
2261 }
2262 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002263 case VK_FORMAT_R32G32_SFLOAT:
2264 case VK_FORMAT_R32G32_SINT:
2265 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002266 buffer = cBuffer + 8 * x;
2267
2268 value = *Pointer<Float4>(buffer);
2269
2270 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2271 {
2272 Float4 masked = value;
2273 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2274 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2275 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2276 }
2277
2278 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2279 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
2280 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2281 *Pointer<Float4>(buffer) = oC.x;
2282
2283 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2284
2285 value = *Pointer<Float4>(buffer);
2286
2287 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2288 {
2289 Float4 masked;
2290
2291 masked = value;
2292 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2293 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2294 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2295 }
2296
2297 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2298 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
2299 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2300 *Pointer<Float4>(buffer) = oC.y;
2301 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002302 case VK_FORMAT_R16G16_SFLOAT:
2303 if((rgbaWriteMask & 0x00000003) != 0x0)
2304 {
2305 buffer = cBuffer + 4 * x;
2306
2307 UInt2 rgbaMask;
2308 UInt2 packedCol;
2309 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2310 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2311
2312 UShort4 value = *Pointer<UShort4>(buffer);
2313 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2314 if((rgbaWriteMask & 0x3) != 0x3)
2315 {
2316 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2317 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2318 mergedMask &= rgbaMask;
2319 }
2320 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2321
2322 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2323
2324 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
2325 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
2326 value = *Pointer<UShort4>(buffer);
2327 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2328 if((rgbaWriteMask & 0x3) != 0x3)
2329 {
2330 mergedMask &= rgbaMask;
2331 }
2332 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2333 }
2334 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002335 case VK_FORMAT_R16G16_SINT:
2336 case VK_FORMAT_R16G16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002337 if((rgbaWriteMask & 0x00000003) != 0x0)
2338 {
2339 buffer = cBuffer + 4 * x;
2340
2341 UInt2 rgbaMask;
2342 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2343 UShort4 value = *Pointer<UShort4>(buffer);
2344 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2345 if((rgbaWriteMask & 0x3) != 0x3)
2346 {
2347 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2348 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2349 mergedMask &= rgbaMask;
2350 }
2351 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2352
2353 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2354
2355 packedCol = UShort4(As<Int4>(oC.y));
2356 value = *Pointer<UShort4>(buffer);
2357 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2358 if((rgbaWriteMask & 0x3) != 0x3)
2359 {
2360 mergedMask &= rgbaMask;
2361 }
2362 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2363 }
2364 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002365 case VK_FORMAT_R8G8_SINT:
2366 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002367 if((rgbaWriteMask & 0x00000003) != 0x0)
2368 {
2369 buffer = cBuffer + 2 * x;
2370
2371 Int2 xyzw, packedCol;
2372
2373 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2374 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2375 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2376
Ben Clayton8ab40532019-05-10 16:23:13 +01002377 if(targetFormat == VK_FORMAT_R8G8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002378 {
2379 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2380 }
2381 else
2382 {
2383 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2384 }
2385
2386 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2387 if((rgbaWriteMask & 0x3) != 0x3)
2388 {
2389 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2390 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2391 mergedMask &= rgbaMask;
2392 }
2393
2394 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2395
2396 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2397 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2398 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2399 }
2400 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002401 case VK_FORMAT_R32G32B32A32_SFLOAT:
2402 case VK_FORMAT_R32G32B32A32_SINT:
2403 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002404 buffer = cBuffer + 16 * x;
2405
2406 {
2407 value = *Pointer<Float4>(buffer, 16);
2408
2409 if(rgbaWriteMask != 0x0000000F)
2410 {
2411 Float4 masked = value;
2412 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2413 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2414 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2415 }
2416
2417 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2418 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
2419 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2420 *Pointer<Float4>(buffer, 16) = oC.x;
2421 }
2422
2423 {
2424 value = *Pointer<Float4>(buffer + 16, 16);
2425
2426 if(rgbaWriteMask != 0x0000000F)
2427 {
2428 Float4 masked = value;
2429 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2430 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2431 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2432 }
2433
2434 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2435 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
2436 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2437 *Pointer<Float4>(buffer + 16, 16) = oC.y;
2438 }
2439
2440 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2441
2442 {
2443 value = *Pointer<Float4>(buffer, 16);
2444
2445 if(rgbaWriteMask != 0x0000000F)
2446 {
2447 Float4 masked = value;
2448 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2449 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2450 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
2451 }
2452
2453 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2454 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
2455 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2456 *Pointer<Float4>(buffer, 16) = oC.z;
2457 }
2458
2459 {
2460 value = *Pointer<Float4>(buffer + 16, 16);
2461
2462 if(rgbaWriteMask != 0x0000000F)
2463 {
2464 Float4 masked = value;
2465 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2466 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2467 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
2468 }
2469
2470 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2471 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
2472 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2473 *Pointer<Float4>(buffer + 16, 16) = oC.w;
2474 }
2475 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002476 case VK_FORMAT_R16G16B16A16_SFLOAT:
2477 if((rgbaWriteMask & 0x0000000F) != 0x0)
2478 {
2479 buffer = cBuffer + 8 * x;
2480
2481 UInt4 rgbaMask;
2482 UInt4 value = *Pointer<UInt4>(buffer);
2483 UInt4 packedCol;
2484 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2485 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2486 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
2487 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
2488 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2489 if((rgbaWriteMask & 0xF) != 0xF)
2490 {
2491 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2492 rgbaMask = UInt4(tmpMask, tmpMask);
2493 mergedMask &= rgbaMask;
2494 }
2495 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2496
2497 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2498
2499 value = *Pointer<UInt4>(buffer);
2500 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
2501 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
2502 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
2503 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
2504 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2505 if((rgbaWriteMask & 0xF) != 0xF)
2506 {
2507 mergedMask &= rgbaMask;
2508 }
2509 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2510 }
2511 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002512 case VK_FORMAT_R16G16B16A16_SINT:
2513 case VK_FORMAT_R16G16B16A16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002514 if((rgbaWriteMask & 0x0000000F) != 0x0)
2515 {
2516 buffer = cBuffer + 8 * x;
2517
2518 UInt4 rgbaMask;
2519 UShort8 value = *Pointer<UShort8>(buffer);
2520 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2521 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2522 if((rgbaWriteMask & 0xF) != 0xF)
2523 {
2524 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2525 rgbaMask = UInt4(tmpMask, tmpMask);
2526 mergedMask &= rgbaMask;
2527 }
2528 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2529
2530 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2531
2532 value = *Pointer<UShort8>(buffer);
2533 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2534 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2535 if((rgbaWriteMask & 0xF) != 0xF)
2536 {
2537 mergedMask &= rgbaMask;
2538 }
2539 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2540 }
2541 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002542 case VK_FORMAT_R8G8B8A8_SINT:
2543 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbes6407c1a2019-04-15 17:22:57 -07002544 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2545 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002546 if((rgbaWriteMask & 0x0000000F) != 0x0)
2547 {
2548 UInt2 value, packedCol, mergedMask;
2549
2550 buffer = cBuffer + 4 * x;
2551
Ben Clayton8ab40532019-05-10 16:23:13 +01002552 bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
Chris Forbes6407c1a2019-04-15 17:22:57 -07002553
2554 if(isSigned)
Nicolas Capens68a82382018-10-02 13:16:55 -04002555 {
2556 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2557 }
2558 else
2559 {
2560 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2561 }
2562 value = *Pointer<UInt2>(buffer, 16);
2563 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2564 if(rgbaWriteMask != 0xF)
2565 {
2566 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2567 }
2568 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2569
2570 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2571
Chris Forbes6407c1a2019-04-15 17:22:57 -07002572 if(isSigned)
Nicolas Capens68a82382018-10-02 13:16:55 -04002573 {
2574 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2575 }
2576 else
2577 {
2578 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2579 }
2580 value = *Pointer<UInt2>(buffer, 16);
2581 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2582 if(rgbaWriteMask != 0xF)
2583 {
2584 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2585 }
2586 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2587 }
2588 break;
Chris Forbesb0f37162019-05-03 07:25:58 -07002589 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2590 if ((rgbaWriteMask & 0x0000000F) != 0x0)
2591 {
2592 Int2 mergedMask, packedCol, value;
2593 Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
2594 ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
2595 ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
2596 ((As<Int4>(oC.x) & Int4(0x3ff)));
2597
2598 buffer = cBuffer + 4 * x;
2599 value = *Pointer<Int2>(buffer, 16);
2600 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2601 if (rgbaWriteMask != 0xF)
2602 {
2603 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2604 }
2605 *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
2606
2607 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2608
2609 value = *Pointer<Int2>(buffer, 16);
2610 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2611 if (rgbaWriteMask != 0xF)
2612 {
2613 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2614 }
2615 *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
2616 }
2617 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04002618 default:
Ben Clayton8ab40532019-05-10 16:23:13 +01002619 UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
Nicolas Capens68a82382018-10-02 13:16:55 -04002620 }
2621 }
2622
2623 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2624 {
2625 return UShort4(cf * Float4(0xFFFF), saturate);
2626 }
2627
2628 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
2629 {
2630 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
2631
2632 c.x = As<UShort4>(c.x) >> 4;
2633 c.y = As<UShort4>(c.y) >> 4;
2634 c.z = As<UShort4>(c.z) >> 4;
2635
2636 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2637 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2638 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2639 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2640
2641 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2642 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2643 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2644 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2645
2646 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2647 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2648 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2649 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2650 }
2651
2652 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
2653 {
2654 c.x = As<UShort4>(c.x) >> 4;
2655 c.y = As<UShort4>(c.y) >> 4;
2656 c.z = As<UShort4>(c.z) >> 4;
2657
2658 linearToSRGB12_16(c);
2659 }
2660
2661 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
2662 {
2663 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
2664
2665 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2666 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2669
2670 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2671 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2674
2675 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2676 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2679 }
2680
2681 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2682 {
2683 Float4 linear = x * x;
2684 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2685
2686 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2687 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002688}