blob: 24e9f6c9321f1835914ef0db1f969287b4b54999 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "SamplerCore.hpp"
16
Nicolas Capens125dba02019-04-24 02:03:22 -040017#include "PixelRoutine.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Constants.hpp"
Nicolas Capens97da7822019-04-30 17:33:26 -040019#include "Vulkan/VkSampler.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080020#include "Vulkan/VkDebug.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040021
22namespace
23{
Chris Forbes1d1720a2019-04-25 08:43:17 -070024 void applySwizzle(VkComponentSwizzle swizzle, sw::Float4& f, const sw::Vector4f& c, bool integer)
Nicolas Capens68a82382018-10-02 13:16:55 -040025 {
26 switch(swizzle)
27 {
Nicolas Capensa195abb2019-04-25 17:15:56 -040028 case VK_COMPONENT_SWIZZLE_R: f = c.x; break;
29 case VK_COMPONENT_SWIZZLE_G: f = c.y; break;
30 case VK_COMPONENT_SWIZZLE_B: f = c.z; break;
31 case VK_COMPONENT_SWIZZLE_A: f = c.w; break;
32 case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
Chris Forbes1d1720a2019-04-25 08:43:17 -070033 case VK_COMPONENT_SWIZZLE_ONE:
34 if (integer)
35 {
Nicolas Capensa195abb2019-04-25 17:15:56 -040036 f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1));
37 }
38 else
Chris Forbes1d1720a2019-04-25 08:43:17 -070039 {
40 f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f);
41 }
42 break;
Nicolas Capens68a82382018-10-02 13:16:55 -040043 default: ASSERT(false);
44 }
45 }
46}
47
48namespace sw
49{
Nicolas Capens1e7120e2019-04-30 17:33:26 -040050 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state) : constants(constants), state(state)
Nicolas Capens68a82382018-10-02 13:16:55 -040051 {
52 }
53
Nicolas Capens97da7822019-04-30 17:33:26 -040054 Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &lodOrBias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
Nicolas Capens68a82382018-10-02 13:16:55 -040055 {
Nicolas Capensb5d5f472019-04-24 00:05:42 -040056 Vector4f c;
Nicolas Capens68a82382018-10-02 13:16:55 -040057
58 #if PERF_PROFILE
59 AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
60
61 if(state.compressedFormat)
62 {
63 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
64 }
65 #endif
66
Nicolas Capens2d548402019-04-16 10:41:01 -040067 Float4 uuuu = u;
68 Float4 vvvv = v;
69 Float4 wwww = w;
70 Float4 qqqq = q;
Nicolas Capens68a82382018-10-02 13:16:55 -040071
Nicolas Capens2d548402019-04-16 10:41:01 -040072 Int face[4];
73 Float lod;
74 Float anisotropy;
75 Float4 uDelta;
76 Float4 vDelta;
Nicolas Capensb1670ed2019-05-02 00:14:17 -040077 Float4 M; // Major axis
Nicolas Capens2d548402019-04-16 10:41:01 -040078
Nicolas Capensb1670ed2019-05-02 00:14:17 -040079 if(state.textureType == TEXTURE_CUBE)
Nicolas Capens2d548402019-04-16 10:41:01 -040080 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -040081 cubeFace(face, uuuu, vvvv, u, v, w, M);
82 }
83
84 if(function == Implicit || function == Bias || function == Grad)
85 {
86 if(state.textureType != TEXTURE_3D)
Nicolas Capens68a82382018-10-02 13:16:55 -040087 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -040088 if(state.textureType != TEXTURE_CUBE)
89 {
90 computeLod(texture, sampler, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodOrBias.x, dsx, dsy, function);
91 }
92 else
93 {
94 computeLodCube(texture, sampler, lod, u, v, w, lodOrBias.x, dsx, dsy, M, function);
95 }
Nicolas Capens68a82382018-10-02 13:16:55 -040096 }
97 else
98 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -040099 computeLod3D(texture, sampler, lod, uuuu, vvvv, wwww, lodOrBias.x, dsx, dsy, function);
100 }
101
102 if(function == Bias)
103 {
104 lod += lodOrBias.x;
Nicolas Capens68a82382018-10-02 13:16:55 -0400105 }
106 }
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400107 else if(function == Lod)
Nicolas Capens68a82382018-10-02 13:16:55 -0400108 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400109 lod = lodOrBias.x;
110 }
111 else if(function == Fetch)
112 {
113 // TODO: Eliminate int-float-int conversion.
114 lod = Float(As<Int>(Float(lodOrBias.x)));
115 }
116 else if(function == Base)
117 {
118 lod = Float(0);
119 }
120 else UNREACHABLE("Sampler function %d", int(function));
121
122 if(function != Base)
123 {
Nicolas Capens58edd472019-05-02 00:14:17 -0400124 lod += *Pointer<Float>(sampler + OFFSET(vk::Sampler, mipLodBias));
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400125 lod = Max(lod, *Pointer<Float>(sampler + OFFSET(vk::Sampler, minLod)));
126 lod = Min(lod, *Pointer<Float>(sampler + OFFSET(vk::Sampler, maxLod)));
Nicolas Capens2d548402019-04-16 10:41:01 -0400127 }
128
Nicolas Capens60a6a2e2019-05-06 15:09:44 -0400129 bool force32BitFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
Nicolas Capens2d548402019-04-16 10:41:01 -0400130 bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS);
131 bool rectangleTexture = (state.textureType == TEXTURE_RECTANGLE);
Nicolas Capens60a6a2e2019-05-06 15:09:44 -0400132 bool use32BitFiltering = hasFloatTexture() || hasUnnormalizedIntegerTexture() || force32BitFiltering ||
133 seamlessCube || rectangleTexture || state.compareEnable || borderModeActive();
134
135 if(use32BitFiltering)
Nicolas Capens2d548402019-04-16 10:41:01 -0400136 {
Nicolas Capens2d548402019-04-16 10:41:01 -0400137 c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
Nicolas Capens68a82382018-10-02 13:16:55 -0400138
Chris Forbesc71c17f2019-05-04 10:01:04 -0700139 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable)
Nicolas Capens68a82382018-10-02 13:16:55 -0400140 {
Nicolas Capens2d548402019-04-16 10:41:01 -0400141 if(has16bitTextureFormat())
Nicolas Capens68a82382018-10-02 13:16:55 -0400142 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400143 switch(state.textureFormat)
144 {
Alexis Hetudd152e12018-11-14 13:39:28 -0500145 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens2d548402019-04-16 10:41:01 -0400146 c.x *= Float4(1.0f / 0xF800);
147 c.y *= Float4(1.0f / 0xFC00);
148 c.z *= Float4(1.0f / 0xF800);
Nicolas Capens68a82382018-10-02 13:16:55 -0400149 break;
Chris Forbes20aab4e2019-05-01 16:55:32 -0700150 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
151 c.x *= Float4(1.0f / 0xF000);
152 c.y *= Float4(1.0f / 0xF000);
153 c.z *= Float4(1.0f / 0xF000);
154 c.w *= Float4(1.0f / 0xF000);
155 break;
Chris Forbesaddcdcc2019-05-02 09:42:00 -0700156 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
157 c.x *= Float4(1.0f / 0xF800);
158 c.y *= Float4(1.0f / 0xF800);
159 c.z *= Float4(1.0f / 0xF800);
160 c.w *= Float4(1.0f / 0x8000);
161 break;
Nicolas Capens68a82382018-10-02 13:16:55 -0400162 default:
163 ASSERT(false);
164 }
165 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400166 else
167 {
168 for(int component = 0; component < textureComponentCount(); component++)
169 {
Nicolas Capens2d548402019-04-16 10:41:01 -0400170 c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
Nicolas Capens68a82382018-10-02 13:16:55 -0400171 }
172 }
173 }
Nicolas Capens2d548402019-04-16 10:41:01 -0400174 }
Nicolas Capens60a6a2e2019-05-06 15:09:44 -0400175 else // 16-bit filtering.
Nicolas Capens2d548402019-04-16 10:41:01 -0400176 {
Nicolas Capensb5d5f472019-04-24 00:05:42 -0400177 Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
Nicolas Capens68a82382018-10-02 13:16:55 -0400178
Nicolas Capensa195abb2019-04-25 17:15:56 -0400179 if(state.textureFormat == VK_FORMAT_R5G6B5_UNORM_PACK16)
Nicolas Capens68a82382018-10-02 13:16:55 -0400180 {
Nicolas Capens2d548402019-04-16 10:41:01 -0400181 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
182 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
183 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
184 }
Chris Forbes20aab4e2019-05-01 16:55:32 -0700185 else if (state.textureFormat == VK_FORMAT_B4G4R4A4_UNORM_PACK16)
186 {
187 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
188 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
189 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
190 c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
191 }
Chris Forbesaddcdcc2019-05-02 09:42:00 -0700192 else if (state.textureFormat == VK_FORMAT_A1R5G5B5_UNORM_PACK16)
193 {
194 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
195 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
196 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
197 c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
198 }
Nicolas Capens2d548402019-04-16 10:41:01 -0400199 else
200 {
201 for(int component = 0; component < textureComponentCount(); component++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400202 {
Nicolas Capens2d548402019-04-16 10:41:01 -0400203 if(hasUnsignedTextureComponent(component))
204 {
205 convertUnsigned16(c[component], cs[component]);
206 }
207 else
208 {
209 convertSigned15(c[component], cs[component]);
210 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400211 }
212 }
Nicolas Capens2d548402019-04-16 10:41:01 -0400213 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400214
Chris Forbes3c2035312019-04-25 08:30:58 -0700215 if((state.swizzle.r != VK_COMPONENT_SWIZZLE_R) ||
216 (state.swizzle.g != VK_COMPONENT_SWIZZLE_G) ||
217 (state.swizzle.b != VK_COMPONENT_SWIZZLE_B) ||
218 (state.swizzle.a != VK_COMPONENT_SWIZZLE_A))
Nicolas Capens2d548402019-04-16 10:41:01 -0400219 {
220 const Vector4f col(c);
Chris Forbes1d1720a2019-04-25 08:43:17 -0700221 auto integer = hasUnnormalizedIntegerTexture();
222 applySwizzle(state.swizzle.r, c.x, col, integer);
223 applySwizzle(state.swizzle.g, c.y, col, integer);
224 applySwizzle(state.swizzle.b, c.z, col, integer);
225 applySwizzle(state.swizzle.a, c.w, col, integer);
Nicolas Capens2d548402019-04-16 10:41:01 -0400226 }
227
Nicolas Capens68a82382018-10-02 13:16:55 -0400228 return c;
229 }
230
Nicolas Capens68a82382018-10-02 13:16:55 -0400231 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
232 {
233 Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
234
235 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
236 {
237 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
238 }
239 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
240 {
241 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
242 }
243
244 if(wrap)
245 {
246 switch(count)
247 {
248 case -1: return uvw - offset;
249 case 0: return uvw;
250 case +1: return uvw + offset;
251 case 2: return uvw + offset + offset;
252 }
253 }
254 else // Clamp or mirror
255 {
256 switch(count)
257 {
258 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
259 case 0: return uvw;
260 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
261 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
262 }
263 }
264
265 return uvw;
266 }
267
268 Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
269 {
270 Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
271
272 if(function == Fetch)
273 {
274 return c;
275 }
276
277 if(state.mipmapFilter == MIPMAP_LINEAR)
278 {
279 Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
280
281 lod *= Float(1 << 16);
282
283 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize
284 Short4 stri = utri >> 1; // FIXME: Optimize
285
286 if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
287 if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
288 if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
289 if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
290
291 utri = ~utri;
292 stri = Short4(0x7FFF) - stri;
293
294 if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
295 if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
296 if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
297 if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
298
299 c.x += cc.x;
300 c.y += cc.y;
301 c.z += cc.z;
302 c.w += cc.w;
303
304 if(!hasUnsignedTextureComponent(0)) c.x += c.x;
305 if(!hasUnsignedTextureComponent(1)) c.y += c.y;
306 if(!hasUnsignedTextureComponent(2)) c.z += c.z;
307 if(!hasUnsignedTextureComponent(3)) c.w += c.w;
308 }
309
Nicolas Capens68a82382018-10-02 13:16:55 -0400310 return c;
311 }
312
313 Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
314 {
315 Vector4s c;
316
317 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
318 {
319 c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
320 }
321 else
322 {
323 Int a = RoundInt(anisotropy);
324
325 Vector4s cSum;
326
327 cSum.x = Short4(0);
328 cSum.y = Short4(0);
329 cSum.z = Short4(0);
330 cSum.w = Short4(0);
331
332 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
333 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
334 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
335 Short4 sw = Short4(cw >> 1);
336
337 Float4 du = uDelta;
338 Float4 dv = vDelta;
339
340 Float4 u0 = u + B * du;
341 Float4 v0 = v + B * dv;
342
343 du *= A;
344 dv *= A;
345
346 Int i = 0;
347
348 Do
349 {
350 c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
351
352 u0 += du;
353 v0 += dv;
354
355 if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
356 if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
357 if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
358 if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
359
360 i++;
361 }
362 Until(i >= a)
363
364 if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
365 if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
366 if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
367 if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
368 }
369
370 return c;
371 }
372
373 Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
374 {
375 if(state.textureType != TEXTURE_3D)
376 {
377 return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
378 }
379 else
380 {
381 return sample3D(texture, u, v, w, offset, lod, secondLOD, function);
382 }
383 }
384
385 Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
386 {
387 Vector4s c;
388
389 int componentCount = textureComponentCount();
390 bool gather = state.textureFilter == FILTER_GATHER;
391
392 Pointer<Byte> mipmap;
393 Pointer<Byte> buffer[4];
394
395 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
396
397 bool texelFetch = (function == Fetch);
398
399 Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
400 Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
401 Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
402
403 if(state.textureFilter == FILTER_POINT || texelFetch)
404 {
405 c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
406 }
407 else
408 {
409 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
410 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
411 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
412 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
413
414 Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
415 Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
416 Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
417 Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
418
419 if(!gather) // Blend
420 {
421 // Fractions
422 UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
423 UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
424
425 UShort4 f1u = ~f0u;
426 UShort4 f1v = ~f0v;
427
428 UShort4 f0u0v = MulHigh(f0u, f0v);
429 UShort4 f1u0v = MulHigh(f1u, f0v);
430 UShort4 f0u1v = MulHigh(f0u, f1v);
431 UShort4 f1u1v = MulHigh(f1u, f1v);
432
433 // Signed fractions
434 Short4 f1u1vs;
435 Short4 f0u1vs;
436 Short4 f1u0vs;
437 Short4 f0u0vs;
438
439 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
440 {
441 f1u1vs = f1u1v >> 1;
442 f0u1vs = f0u1v >> 1;
443 f1u0vs = f1u0v >> 1;
444 f0u0vs = f0u0v >> 1;
445 }
446
447 // Bilinear interpolation
448 if(componentCount >= 1)
449 {
450 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
451 {
452 c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
453 c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
454 c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
455 }
456 else
457 {
458 if(hasUnsignedTextureComponent(0))
459 {
460 c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
461 c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
462 c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
463 c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
464 }
465 else
466 {
467 c0.x = MulHigh(c0.x, f1u1vs);
468 c1.x = MulHigh(c1.x, f0u1vs);
469 c2.x = MulHigh(c2.x, f1u0vs);
470 c3.x = MulHigh(c3.x, f0u0vs);
471 }
472
473 c.x = (c0.x + c1.x) + (c2.x + c3.x);
474 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
475 }
476 }
477
478 if(componentCount >= 2)
479 {
480 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
481 {
482 c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
483 c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
484 c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
485 }
486 else
487 {
488 if(hasUnsignedTextureComponent(1))
489 {
490 c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
491 c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
492 c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
493 c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
494 }
495 else
496 {
497 c0.y = MulHigh(c0.y, f1u1vs);
498 c1.y = MulHigh(c1.y, f0u1vs);
499 c2.y = MulHigh(c2.y, f1u0vs);
500 c3.y = MulHigh(c3.y, f0u0vs);
501 }
502
503 c.y = (c0.y + c1.y) + (c2.y + c3.y);
504 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
505 }
506 }
507
508 if(componentCount >= 3)
509 {
510 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
511 {
512 c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
513 c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
514 c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
515 }
516 else
517 {
518 if(hasUnsignedTextureComponent(2))
519 {
520 c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
521 c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
522 c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
523 c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
524 }
525 else
526 {
527 c0.z = MulHigh(c0.z, f1u1vs);
528 c1.z = MulHigh(c1.z, f0u1vs);
529 c2.z = MulHigh(c2.z, f1u0vs);
530 c3.z = MulHigh(c3.z, f0u0vs);
531 }
532
533 c.z = (c0.z + c1.z) + (c2.z + c3.z);
534 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
535 }
536 }
537
538 if(componentCount >= 4)
539 {
540 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
541 {
542 c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
543 c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
544 c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
545 }
546 else
547 {
548 if(hasUnsignedTextureComponent(3))
549 {
550 c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
551 c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
552 c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
553 c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
554 }
555 else
556 {
557 c0.w = MulHigh(c0.w, f1u1vs);
558 c1.w = MulHigh(c1.w, f0u1vs);
559 c2.w = MulHigh(c2.w, f1u0vs);
560 c3.w = MulHigh(c3.w, f0u0vs);
561 }
562
563 c.w = (c0.w + c1.w) + (c2.w + c3.w);
564 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
565 }
566 }
567 }
568 else
569 {
570 c.x = c1.x;
571 c.y = c2.x;
572 c.z = c3.x;
573 c.w = c0.x;
574 }
575 }
576
577 return c;
578 }
579
580 Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
581 {
582 Vector4s c_;
583
584 int componentCount = textureComponentCount();
585
586 Pointer<Byte> mipmap;
587 Pointer<Byte> buffer[4];
588 Int face[4];
589
590 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
591
592 bool texelFetch = (function == Fetch);
593
594 Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap);
595 Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap);
596 Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap);
597
598 if(state.textureFilter == FILTER_POINT || texelFetch)
599 {
600 c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
601 }
602 else
603 {
604 Vector4s c[2][2][2];
605
606 Short4 u[2][2][2];
607 Short4 v[2][2][2];
608 Short4 s[2][2][2];
609
610 for(int i = 0; i < 2; i++)
611 {
612 for(int j = 0; j < 2; j++)
613 {
614 for(int k = 0; k < 2; k++)
615 {
616 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
617 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
618 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
619 }
620 }
621 }
622
623 // Fractions
624 UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
625 UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
626 UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
627
628 UShort4 f1u = ~f0u;
629 UShort4 f1v = ~f0v;
630 UShort4 f1s = ~f0s;
631
632 UShort4 f[2][2][2];
633 Short4 fs[2][2][2];
634
635 f[1][1][1] = MulHigh(f1u, f1v);
636 f[0][1][1] = MulHigh(f0u, f1v);
637 f[1][0][1] = MulHigh(f1u, f0v);
638 f[0][0][1] = MulHigh(f0u, f0v);
639 f[1][1][0] = MulHigh(f1u, f1v);
640 f[0][1][0] = MulHigh(f0u, f1v);
641 f[1][0][0] = MulHigh(f1u, f0v);
642 f[0][0][0] = MulHigh(f0u, f0v);
643
644 f[1][1][1] = MulHigh(f[1][1][1], f1s);
645 f[0][1][1] = MulHigh(f[0][1][1], f1s);
646 f[1][0][1] = MulHigh(f[1][0][1], f1s);
647 f[0][0][1] = MulHigh(f[0][0][1], f1s);
648 f[1][1][0] = MulHigh(f[1][1][0], f0s);
649 f[0][1][0] = MulHigh(f[0][1][0], f0s);
650 f[1][0][0] = MulHigh(f[1][0][0], f0s);
651 f[0][0][0] = MulHigh(f[0][0][0], f0s);
652
653 // Signed fractions
654 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
655 {
656 fs[0][0][0] = f[0][0][0] >> 1;
657 fs[0][0][1] = f[0][0][1] >> 1;
658 fs[0][1][0] = f[0][1][0] >> 1;
659 fs[0][1][1] = f[0][1][1] >> 1;
660 fs[1][0][0] = f[1][0][0] >> 1;
661 fs[1][0][1] = f[1][0][1] >> 1;
662 fs[1][1][0] = f[1][1][0] >> 1;
663 fs[1][1][1] = f[1][1][1] >> 1;
664 }
665
666 for(int i = 0; i < 2; i++)
667 {
668 for(int j = 0; j < 2; j++)
669 {
670 for(int k = 0; k < 2; k++)
671 {
672 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
673
674 if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
675 if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
676 if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
677 if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
678
679 if(i != 0 || j != 0 || k != 0)
680 {
681 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
682 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
683 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
684 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
685 }
686 }
687 }
688 }
689
690 if(componentCount >= 1) c_.x = c[0][0][0].x;
691 if(componentCount >= 2) c_.y = c[0][0][0].y;
692 if(componentCount >= 3) c_.z = c[0][0][0].z;
693 if(componentCount >= 4) c_.w = c[0][0][0].w;
694
695 // Correct for signed fractions
696 if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
697 if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
698 if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
699 if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
700 }
701
702 return c_;
703 }
704
705 Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
706 {
707 Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
708
709 if(function == Fetch)
710 {
711 return c;
712 }
713
714 if(state.mipmapFilter == MIPMAP_LINEAR)
715 {
716 Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
717
718 Float4 lod4 = Float4(Frac(lod));
719
720 c.x = (cc.x - c.x) * lod4 + c.x;
721 c.y = (cc.y - c.y) * lod4 + c.y;
722 c.z = (cc.z - c.z) * lod4 + c.z;
723 c.w = (cc.w - c.w) * lod4 + c.w;
724 }
725
Nicolas Capens68a82382018-10-02 13:16:55 -0400726 return c;
727 }
728
729 Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
730 {
731 Vector4f c;
732
733 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
734 {
735 c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function);
736 }
737 else
738 {
739 Int a = RoundInt(anisotropy);
740
741 Vector4f cSum;
742
743 cSum.x = Float4(0.0f);
744 cSum.y = Float4(0.0f);
745 cSum.z = Float4(0.0f);
746 cSum.w = Float4(0.0f);
747
748 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
749 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
750
751 Float4 du = uDelta;
752 Float4 dv = vDelta;
753
754 Float4 u0 = u + B * du;
755 Float4 v0 = v + B * dv;
756
757 du *= A;
758 dv *= A;
759
760 Int i = 0;
761
762 Do
763 {
764 c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function);
765
766 u0 += du;
767 v0 += dv;
768
769 cSum.x += c.x * A;
770 cSum.y += c.y * A;
771 cSum.z += c.z * A;
772 cSum.w += c.w * A;
773
774 i++;
775 }
776 Until(i >= a)
777
778 c.x = cSum.x;
779 c.y = cSum.y;
780 c.z = cSum.z;
781 c.w = cSum.w;
782 }
783
784 return c;
785 }
786
787 Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
788 {
789 if(state.textureType != TEXTURE_3D)
790 {
791 return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function);
792 }
793 else
794 {
795 return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function);
796 }
797 }
798
799 Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
800 {
801 Vector4f c;
802
803 int componentCount = textureComponentCount();
804 bool gather = state.textureFilter == FILTER_GATHER;
805
806 Pointer<Byte> mipmap;
807 Pointer<Byte> buffer[4];
808
809 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
810
811 Int4 x0, x1, y0, y1, z0;
Nicolas Capens10f7bef2019-05-02 21:37:55 -0400812 Float4 fu, fv, fw;
Nicolas Capens68a82382018-10-02 13:16:55 -0400813 Int4 filter = computeFilterOffset(lod);
814 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
815 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
Nicolas Capens10f7bef2019-05-02 21:37:55 -0400816 address(w, z0, z0, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
Nicolas Capens68a82382018-10-02 13:16:55 -0400817
818 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
819 y0 *= pitchP;
820 if(hasThirdCoordinate())
821 {
822 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
823 z0 *= sliceP;
824 }
825
826 if(state.textureFilter == FILTER_POINT || (function == Fetch))
827 {
828 c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
829 }
830 else
831 {
832 y1 *= pitchP;
833
834 Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
835 Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function);
836 Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function);
837 Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function);
838
839 if(!gather) // Blend
840 {
841 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
842 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
843 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
844 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
845
846 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
847 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
848 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
849 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
850
851 if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
852 if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
853 if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
854 if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
855 }
856 else
857 {
858 c.x = c1.x;
859 c.y = c2.x;
860 c.z = c3.x;
861 c.w = c0.x;
862 }
863 }
864
865 return c;
866 }
867
868 Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
869 {
870 Vector4f c;
871
872 int componentCount = textureComponentCount();
873
874 Pointer<Byte> mipmap;
875 Pointer<Byte> buffer[4];
876 Int face[4];
877
878 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
879
880 Int4 x0, x1, y0, y1, z0, z1;
881 Float4 fu, fv, fw;
882 Int4 filter = computeFilterOffset(lod);
883 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
884 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
885 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
886
887 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
888 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
889 y0 *= pitchP;
890 z0 *= sliceP;
891
892 if(state.textureFilter == FILTER_POINT || (function == Fetch))
893 {
894 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
895 }
896 else
897 {
898 y1 *= pitchP;
899 z1 *= sliceP;
900
901 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
902 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
903 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
904 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
905 Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
906 Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
907 Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
908 Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
909
910 // Blend first slice
911 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
912 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
913 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
914 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
915
916 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
917 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
918 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
919 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
920
921 if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
922 if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
923 if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
924 if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
925
926 // Blend second slice
927 if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
928 if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
929 if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
930 if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
931
932 if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
933 if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
934 if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
935 if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
936
937 if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
938 if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
939 if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
940 if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
941
942 // Blend slices
943 if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
944 if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
945 if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
946 if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
947 }
948
949 return c;
950 }
951
952 Float SamplerCore::log2sqrt(Float lod)
953 {
954 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2).
955 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
956 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
957 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
958
959 return lod;
960 }
961
962 Float SamplerCore::log2(Float lod)
963 {
964 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
965 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
966 lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length).
967
968 return lod;
969 }
970
Nicolas Capens1e7120e2019-04-30 17:33:26 -0400971 void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
Nicolas Capens68a82382018-10-02 13:16:55 -0400972 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400973 Float4 duvdxy;
974
975 if(function != Grad) // Implicit
Nicolas Capens68a82382018-10-02 13:16:55 -0400976 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400977 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
Nicolas Capens68a82382018-10-02 13:16:55 -0400978 }
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400979 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400980 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400981 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
982 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
Nicolas Capens68a82382018-10-02 13:16:55 -0400983
Nicolas Capensb1670ed2019-05-02 00:14:17 -0400984 duvdxy = Float4(dudxy.xz, dvdxy.xz);
985 }
986
987 // Scale by texture dimensions.
988 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture, widthWidthHeightHeight));
989
990 Float4 dUV2dxy = dUVdxy * dUVdxy;
991 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
992
993 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
994
995 if(state.textureFilter == FILTER_ANISOTROPIC)
996 {
997 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
998
999 Float4 dudx = duvdxy.xxxx;
1000 Float4 dudy = duvdxy.yyyy;
1001 Float4 dvdx = duvdxy.zzzz;
1002 Float4 dvdy = duvdxy.wwww;
1003
1004 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
1005 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
1006 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
1007
1008 anisotropy = lod * Rcp_pp(det);
1009 anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler,maxAnisotropy)));
1010
1011 lod *= Rcp_pp(anisotropy * anisotropy);
1012 }
1013
1014 lod = log2sqrt(lod); // log2(sqrt(lod))
Nicolas Capens68a82382018-10-02 13:16:55 -04001015 }
1016
Nicolas Capens1e7120e2019-04-30 17:33:26 -04001017 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
Nicolas Capens68a82382018-10-02 13:16:55 -04001018 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001019 Float4 dudxy, dvdxy, dsdxy;
1020
1021 if(function != Grad) // Implicit
Nicolas Capens68a82382018-10-02 13:16:55 -04001022 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001023 Float4 U = u * M;
1024 Float4 V = v * M;
1025 Float4 W = w * M;
Nicolas Capens68a82382018-10-02 13:16:55 -04001026
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001027 dudxy = Abs(U - U.xxxx);
1028 dvdxy = Abs(V - V.xxxx);
1029 dsdxy = Abs(W - W.xxxx);
Nicolas Capens68a82382018-10-02 13:16:55 -04001030 }
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001031 else
Nicolas Capens68a82382018-10-02 13:16:55 -04001032 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001033 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1034 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1035 dsdxy = Float4(dsx.z.xx, dsy.z.xx);
Nicolas Capens68a82382018-10-02 13:16:55 -04001036
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001037 dudxy = Abs(dudxy * Float4(M.x));
1038 dvdxy = Abs(dvdxy * Float4(M.x));
1039 dsdxy = Abs(dsdxy * Float4(M.x));
1040 }
1041
1042 // Compute the largest Manhattan distance in two dimensions.
1043 // This takes the footprint across adjacent faces into account.
1044 Float4 duvdxy = dudxy + dvdxy;
1045 Float4 dusdxy = dudxy + dsdxy;
1046 Float4 dvsdxy = dvdxy + dsdxy;
1047
1048 dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
1049
1050 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1051
1052 // Scale by texture dimension.
1053 lod *= *Pointer<Float>(texture + OFFSET(Texture,width));
1054
1055 lod = log2(lod);
Nicolas Capens68a82382018-10-02 13:16:55 -04001056 }
1057
Nicolas Capens1e7120e2019-04-30 17:33:26 -04001058 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
Nicolas Capens68a82382018-10-02 13:16:55 -04001059 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001060 Float4 dudxy, dvdxy, dsdxy;
1061
1062 if(function != Grad) // Implicit
Nicolas Capens68a82382018-10-02 13:16:55 -04001063 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001064 dudxy = uuuu - uuuu.xxxx;
1065 dvdxy = vvvv - vvvv.xxxx;
1066 dsdxy = wwww - wwww.xxxx;
Nicolas Capens68a82382018-10-02 13:16:55 -04001067 }
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001068 else
Nicolas Capens68a82382018-10-02 13:16:55 -04001069 {
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001070 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1071 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1072 dsdxy = Float4(dsx.z.xx, dsy.z.xx);
Nicolas Capens68a82382018-10-02 13:16:55 -04001073 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001074
Nicolas Capensb1670ed2019-05-02 00:14:17 -04001075 // Scale by texture dimensions.
1076 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture, width));
1077 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture, height));
1078 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture, depth));
1079
1080 dudxy *= dudxy;
1081 dvdxy *= dvdxy;
1082 dsdxy *= dsdxy;
1083
1084 dudxy += dvdxy;
1085 dudxy += dsdxy;
1086
1087 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1088
1089 lod = log2sqrt(lod); // log2(sqrt(lod))
Nicolas Capens68a82382018-10-02 13:16:55 -04001090 }
1091
1092 void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
1093 {
1094 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
1095 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
1096 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
1097
1098 Float4 absX = Abs(x);
1099 Float4 absY = Abs(y);
1100 Float4 absZ = Abs(z);
1101
1102 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y)
1103 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z)
1104 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x)
1105 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z)
1106 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x)
1107 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y)
1108
1109 // FACE_POSITIVE_X = 000b
1110 // FACE_NEGATIVE_X = 001b
1111 // FACE_POSITIVE_Y = 010b
1112 // FACE_NEGATIVE_Y = 011b
1113 // FACE_POSITIVE_Z = 100b
1114 // FACE_NEGATIVE_Z = 101b
1115
1116 Int yAxis = SignMask(yMajor);
1117 Int zAxis = SignMask(zMajor);
1118
1119 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
1120 Int negative = SignMask(n);
1121
1122 face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
1123 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
1124 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
1125 face[1] = (face[0] >> 4) & 0x7;
1126 face[2] = (face[0] >> 8) & 0x7;
1127 face[3] = (face[0] >> 12) & 0x7;
1128 face[0] &= 0x7;
1129
1130 M = Max(Max(absX, absY), absZ);
1131
1132 // U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x)
1133 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
1134
1135 // V = !yMajor ? -y : (n ^ z)
1136 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
1137
1138 M = reciprocal(M) * Float4(0.5f);
1139 U = U * M + Float4(0.5f);
1140 V = V * M + Float4(0.5f);
1141 }
1142
1143 Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
1144 {
1145 Int4 tmp = Int4(As<UShort4>(uvw));
1146 tmp = tmp + As<Int4>(offset);
1147
1148 switch(mode)
1149 {
1150 case AddressingMode::ADDRESSING_WRAP:
1151 tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
1152 break;
1153 case AddressingMode::ADDRESSING_CLAMP:
1154 case AddressingMode::ADDRESSING_MIRROR:
1155 case AddressingMode::ADDRESSING_MIRRORONCE:
1156 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
1157 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
1158 break;
1159 case ADDRESSING_TEXELFETCH:
1160 break;
1161 case AddressingMode::ADDRESSING_SEAMLESS:
1162 ASSERT(false); // Cube sampling doesn't support offset.
1163 default:
1164 ASSERT(false);
1165 }
1166
1167 return As<Short4>(UShort4(tmp));
1168 }
1169
1170 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
1171 {
1172 bool texelFetch = (function == Fetch);
1173 bool hasOffset = (function.option == Offset);
1174
1175 if(!texelFetch)
1176 {
1177 uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)));
1178 vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)));
1179 }
1180
1181 if(hasOffset)
1182 {
1183 UShort4 w = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width));
1184 uuuu = applyOffset(uuuu, offset.x, Int4(w), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
1185 UShort4 h = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height));
1186 vvvv = applyOffset(vvvv, offset.y, Int4(h), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
1187 }
1188
1189 Short4 uuu2 = uuuu;
1190 uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
1191 uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
1192 uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1193 uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1194
1195 if(hasThirdCoordinate())
1196 {
Nicolas Capense2535df2019-05-06 10:37:50 -04001197 if(state.textureType == TEXTURE_3D)
Nicolas Capens68a82382018-10-02 13:16:55 -04001198 {
1199 if(!texelFetch)
1200 {
1201 wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
1202 }
1203
1204 if(hasOffset)
1205 {
1206 UShort4 d = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth));
1207 wwww = applyOffset(wwww, offset.z, Int4(d), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
1208 }
1209 }
1210
1211 UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2));
1212 uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
1213
1214 index[0] = Extract(As<Int4>(uv), 0);
1215 index[1] = Extract(As<Int4>(uv), 1);
1216 index[2] = Extract(As<Int4>(uv), 2);
1217 index[3] = Extract(As<Int4>(uv), 3);
1218 }
1219 else
1220 {
1221 index[0] = Extract(As<Int2>(uuuu), 0);
1222 index[1] = Extract(As<Int2>(uuuu), 1);
1223 index[2] = Extract(As<Int2>(uuu2), 0);
1224 index[3] = Extract(As<Int2>(uuu2), 1);
1225 }
1226
1227 if(texelFetch)
1228 {
1229 Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
1230 if(hasThirdCoordinate())
1231 {
1232 size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
1233 }
1234 UInt min = 0;
1235 UInt max = size - 1;
1236
1237 for(int i = 0; i < 4; i++)
1238 {
1239 index[i] = Min(Max(index[i], min), max);
1240 }
1241 }
1242 }
1243
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001244 void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, SamplerFunction function)
Nicolas Capens68a82382018-10-02 13:16:55 -04001245 {
1246 UInt4 indices = uuuu + vvvv;
1247
1248 if(hasThirdCoordinate())
1249 {
1250 indices += As<UInt4>(wwww);
1251 }
1252
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001253 if(borderModeActive())
1254 {
1255 // Texels out of range are still sampled before being replaced
1256 // with the border color, so sample them at linear index 0.
1257 indices &= As<UInt4>(valid);
1258 }
1259
Nicolas Capens68a82382018-10-02 13:16:55 -04001260 for(int i = 0; i < 4; i++)
1261 {
1262 index[i] = Extract(As<Int4>(indices), i);
1263 }
1264 }
1265
1266 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
1267 {
1268 Vector4s c;
1269
1270 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
1271 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
1272 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
1273 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
1274
1275 if(has16bitTextureFormat())
1276 {
1277 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1278 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1279 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1280 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1281
1282 switch(state.textureFormat)
1283 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001284 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001285 c.z = (c.x & Short4(0x001Fu)) << 11;
1286 c.y = (c.x & Short4(0x07E0u)) << 5;
1287 c.x = (c.x & Short4(0xF800u));
1288 break;
Chris Forbes20aab4e2019-05-01 16:55:32 -07001289 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1290 c.w = (c.x << 12) & Short4(0xF000);
1291 c.z = (c.x) & Short4(0xF000);
1292 c.y = (c.x << 4) & Short4(0xF000);
1293 c.x = (c.x << 8) & Short4(0xF000);
1294 break;
Chris Forbesaddcdcc2019-05-02 09:42:00 -07001295 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1296 c.w = (c.x) & Short4(0x8000);
1297 c.z = (c.x << 11) & Short4(0xF800);
1298 c.y = (c.x << 6) & Short4(0xF800);
1299 c.x = (c.x << 1) & Short4(0xF800);
1300 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001301 default:
1302 ASSERT(false);
1303 }
1304 }
1305 else if(has8bitTextureComponents())
1306 {
1307 switch(textureComponentCount())
1308 {
1309 case 4:
1310 {
1311 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
1312 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
1313 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
1314 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
1315 c.x = Unpack(c0, c1);
1316 c.y = Unpack(c2, c3);
1317
1318 switch(state.textureFormat)
1319 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001320 case VK_FORMAT_B8G8R8A8_UNORM:
Alexis Hetu8b7597e2019-05-06 15:14:21 -04001321 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001322 c.z = As<Short4>(UnpackLow(c.x, c.y));
1323 c.x = As<Short4>(UnpackHigh(c.x, c.y));
1324 c.y = c.z;
1325 c.w = c.x;
1326 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1327 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1328 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1329 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1330 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001331 case VK_FORMAT_R8G8B8A8_UNORM:
1332 case VK_FORMAT_R8G8B8A8_SINT:
1333 case VK_FORMAT_R8G8B8A8_SNORM:
1334 case VK_FORMAT_R8G8B8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001335 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1336 c.x = As<Short4>(UnpackLow(c.x, c.y));
1337 c.y = c.x;
1338 c.w = c.z;
1339 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1340 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1341 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1342 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1343 // Propagate sign bit
Alexis Hetudd152e12018-11-14 13:39:28 -05001344 if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04001345 {
1346 c.x >>= 8;
1347 c.y >>= 8;
1348 c.z >>= 8;
1349 c.w >>= 8;
1350 }
1351 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001352 case VK_FORMAT_R8G8B8A8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001353 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1354 c.x = As<Short4>(UnpackLow(c.x, c.y));
1355 c.y = c.x;
1356 c.w = c.z;
1357 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1358 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1359 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1360 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
1361 break;
1362 default:
1363 ASSERT(false);
1364 }
1365 }
1366 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001367 case 2:
1368 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1369 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1370 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1371 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1372
1373 switch(state.textureFormat)
1374 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001375 case VK_FORMAT_R8G8_UNORM:
1376 case VK_FORMAT_R8G8_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001377 c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
1378 c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
1379 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001380 case VK_FORMAT_R8G8_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001381 c.y = c.x >> 8;
1382 c.x = (c.x << 8) >> 8; // Propagate sign bit
1383 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001384 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001385 c.y = As<Short4>(As<UShort4>(c.x) >> 8);
1386 c.x &= Short4(0x00FFu);
1387 break;
1388 default:
1389 ASSERT(false);
1390 }
1391 break;
1392 case 1:
1393 {
1394 Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
1395 Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
1396 Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
1397 Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
1398 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1399
1400 switch(state.textureFormat)
1401 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001402 case VK_FORMAT_R8_SINT:
1403 case VK_FORMAT_R8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001404 {
1405 Int zero(0);
1406 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
1407 // Propagate sign bit
Alexis Hetudd152e12018-11-14 13:39:28 -05001408 if(state.textureFormat == VK_FORMAT_R8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04001409 {
1410 c.x = (c.x << 8) >> 8;
1411 }
1412 }
1413 break;
1414 default:
1415 c.x = Unpack(As<Byte4>(c0));
1416 break;
1417 }
1418 }
1419 break;
1420 default:
1421 ASSERT(false);
1422 }
1423 }
1424 else if(has16bitTextureComponents())
1425 {
1426 switch(textureComponentCount())
1427 {
1428 case 4:
1429 c.x = Pointer<Short4>(buffer[f0])[index[0]];
1430 c.y = Pointer<Short4>(buffer[f1])[index[1]];
1431 c.z = Pointer<Short4>(buffer[f2])[index[2]];
1432 c.w = Pointer<Short4>(buffer[f3])[index[3]];
1433 transpose4x4(c.x, c.y, c.z, c.w);
1434 break;
1435 case 3:
1436 c.x = Pointer<Short4>(buffer[f0])[index[0]];
1437 c.y = Pointer<Short4>(buffer[f1])[index[1]];
1438 c.z = Pointer<Short4>(buffer[f2])[index[2]];
1439 c.w = Pointer<Short4>(buffer[f3])[index[3]];
1440 transpose4x3(c.x, c.y, c.z, c.w);
1441 break;
1442 case 2:
1443 c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
1444 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
1445 c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
1446 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
1447 c.y = c.x;
1448 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
1449 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
1450 break;
1451 case 1:
1452 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1453 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1454 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1455 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1456 break;
1457 default:
1458 ASSERT(false);
1459 }
1460 }
Chris Forbesd573e7e2019-05-01 15:05:52 -07001461 else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
1462 {
1463 Int4 cc;
1464 cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
1465 cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
1466 cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
1467 cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
1468
1469 // shift each 10 bit field left 6, and replicate 6 high bits into bottom 6
1470 c.x = Short4(((cc << 6) & Int4(0xFFC0)) | ((cc >> 4) & Int4(0x3F)));
1471 c.y = Short4(((cc >> 4) & Int4(0xFFC0)) | ((cc >> 14) & Int4(0x3F)));
1472 c.z = Short4(((cc >> 14) & Int4(0xFFC0)) | ((cc >> 24) & Int4(0x3F)));
1473 c.w = Short4(((cc >> 16) & Int4(0xC000)));
1474
1475 // replicate 2 bit alpha component all the way down
1476 c.w |= (c.w >> 8) & Short4(0xc0);
1477 c.w |= (c.w >> 4) & Short4(0x0c0c);
1478 c.w |= (c.w >> 2) & Short4(0x3333);
1479 }
Chris Forbes6ef19362019-05-08 16:07:56 -07001480 else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
1481 {
1482 Int4 cc;
1483 cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
1484 cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
1485 cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
1486 cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
1487
1488 c.x = Short4(((cc) & Int4(0x3FF)));
1489 c.y = Short4(((cc >> 10) & Int4(0x3FF)));
1490 c.z = Short4(((cc >> 20) & Int4(0x3FF)));
1491 c.w = Short4(((cc >> 30) & Int4(0x3)));
1492 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001493 else ASSERT(false);
1494
Chris Forbesac3a4a42019-05-01 11:07:50 -07001495 if (state.textureFormat.isSRGBformat())
Nicolas Capens68a82382018-10-02 13:16:55 -04001496 {
Chris Forbesac3a4a42019-05-01 11:07:50 -07001497 for(int i = 0; i < textureComponentCount(); i++)
Nicolas Capens68a82382018-10-02 13:16:55 -04001498 {
Chris Forbesac3a4a42019-05-01 11:07:50 -07001499 if(isRGBComponent(i))
Nicolas Capens68a82382018-10-02 13:16:55 -04001500 {
Chris Forbesac3a4a42019-05-01 11:07:50 -07001501 sRGBtoLinear16_8_16(c[i]);
Nicolas Capens68a82382018-10-02 13:16:55 -04001502 }
1503 }
1504 }
1505
1506 return c;
1507 }
1508
1509 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
1510 {
1511 Vector4s c;
1512
1513 UInt index[4];
1514 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
1515
1516 if(hasYuvFormat())
1517 {
1518 // Generic YPbPr to RGB transformation
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001519 // R = Y + 2 * (1 - Kr) * Pr
1520 // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
1521 // B = Y + 2 * (1 - Kb) * Pb
Nicolas Capens68a82382018-10-02 13:16:55 -04001522
1523 float Kb = 0.114f;
1524 float Kr = 0.299f;
1525 int studioSwing = 1;
1526
1527 switch(state.textureFormat)
1528 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001529 case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1530 // VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601
Nicolas Capens68a82382018-10-02 13:16:55 -04001531 Kb = 0.114f;
1532 Kr = 0.299f;
1533 studioSwing = 1;
1534 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001535 /*
1536 // VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709
Nicolas Capens68a82382018-10-02 13:16:55 -04001537 Kb = 0.0722f;
1538 Kr = 0.2126f;
1539 studioSwing = 1;
1540 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001541 // VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020
Nicolas Capens68a82382018-10-02 13:16:55 -04001542 Kb = 0.114f;
1543 Kr = 0.299f;
1544 studioSwing = 0;
1545 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001546 */
Nicolas Capens68a82382018-10-02 13:16:55 -04001547 default:
1548 ASSERT(false);
1549 }
1550
1551 const float Kg = 1.0f - Kr - Kb;
1552
1553 const float Rr = 2 * (1 - Kr);
1554 const float Gb = -2 * Kb * (1 - Kb) / Kg;
1555 const float Gr = -2 * Kr * (1 - Kr) / Kg;
1556 const float Bb = 2 * (1 - Kb);
1557
1558 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
1559 const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
1560 const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
1561 const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
1562
Alexis Hetu8aa74a42018-10-22 14:54:09 -04001563 const float Rv = Vv * Rr;
1564 const float Gu = Uu * Gb;
1565 const float Gv = Vv * Gr;
1566 const float Bu = Uu * Bb;
Nicolas Capens68a82382018-10-02 13:16:55 -04001567
1568 const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
1569 const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
1570 const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
1571
1572 Int c0 = Int(buffer[0][index[0]]);
1573 Int c1 = Int(buffer[0][index[1]]);
1574 Int c2 = Int(buffer[0][index[2]]);
1575 Int c3 = Int(buffer[0][index[3]]);
1576 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1577 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
1578
1579 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
1580 c0 = Int(buffer[1][index[0]]);
1581 c1 = Int(buffer[1][index[1]]);
1582 c2 = Int(buffer[1][index[2]]);
1583 c3 = Int(buffer[1][index[3]]);
1584 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1585 UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
1586
1587 c0 = Int(buffer[2][index[0]]);
1588 c1 = Int(buffer[2][index[1]]);
1589 c2 = Int(buffer[2][index[2]]);
1590 c3 = Int(buffer[2][index[3]]);
1591 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1592 UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
1593
1594 const UShort4 yY = UShort4(iround(Yy * 0x4000));
1595 const UShort4 rV = UShort4(iround(Rv * 0x4000));
1596 const UShort4 gU = UShort4(iround(-Gu * 0x4000));
1597 const UShort4 gV = UShort4(iround(-Gv * 0x4000));
1598 const UShort4 bU = UShort4(iround(Bu * 0x4000));
1599
1600 const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
1601 const UShort4 g0 = UShort4(iround(G0 * 0x4000));
1602 const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
1603
1604 UShort4 y = MulHigh(Y, yY);
1605 UShort4 r = SubSat(y + MulHigh(V, rV), r0);
1606 UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
1607 UShort4 b = SubSat(y + MulHigh(U, bU), b0);
1608
1609 c.x = Min(r, UShort4(0x3FFF)) << 2;
1610 c.y = Min(g, UShort4(0x3FFF)) << 2;
1611 c.z = Min(b, UShort4(0x3FFF)) << 2;
1612 }
1613 else
1614 {
1615 return sampleTexel(index, buffer);
1616 }
1617
1618 return c;
1619 }
1620
1621 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
1622 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001623 Int4 valid;
1624
1625 if(borderModeActive())
1626 {
1627 // Valid texels have positive coordinates.
1628 Int4 negative = Int4(0);
1629 if(state.addressingModeU == ADDRESSING_BORDER) negative |= uuuu;
1630 if(state.addressingModeV == ADDRESSING_BORDER) negative |= vvvv;
1631 if(state.addressingModeW == ADDRESSING_BORDER) negative |= wwww;
1632 valid = CmpNLT(negative, Int4(0));
1633 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001634
1635 UInt index[4];
Chris Forbes8d7f2332019-05-02 10:58:11 -07001636 UInt4 t0, t1, t2, t3;
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001637 computeIndices(index, uuuu, vvvv, wwww, valid, mipmap, function);
1638
1639 Vector4f c;
Nicolas Capens68a82382018-10-02 13:16:55 -04001640
1641 if(hasFloatTexture() || has32bitIntegerTextureComponents())
1642 {
1643 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
1644 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
1645 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
1646 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
1647
Chris Forbes8d7f2332019-05-02 10:58:11 -07001648 switch (state.textureFormat)
Nicolas Capens68a82382018-10-02 13:16:55 -04001649 {
Chris Forbes8d7f2332019-05-02 10:58:11 -07001650 case VK_FORMAT_R16_SFLOAT:
1651 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
1652 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
1653 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
1654 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
Chris Forbesd3546952019-04-30 19:32:19 -07001655
Chris Forbes8d7f2332019-05-02 10:58:11 -07001656 c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
1657 c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
1658 c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
1659 c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
1660 break;
1661 case VK_FORMAT_R16G16_SFLOAT:
1662 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
1663 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
1664 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
1665 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
Chris Forbesd3546952019-04-30 19:32:19 -07001666
Chris Forbes8d7f2332019-05-02 10:58:11 -07001667 // FIXME: shuffles
1668 c.x = As<Float4>(halfToFloatBits(t0));
1669 c.y = As<Float4>(halfToFloatBits(t1));
1670 c.z = As<Float4>(halfToFloatBits(t2));
1671 c.w = As<Float4>(halfToFloatBits(t3));
1672 transpose4x4(c.x, c.y, c.z, c.w);
1673 break;
1674 case VK_FORMAT_R16G16B16A16_SFLOAT:
1675 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
1676 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
1677 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
1678 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
Chris Forbesd3546952019-04-30 19:32:19 -07001679
Chris Forbes8d7f2332019-05-02 10:58:11 -07001680 c.x = As<Float4>(halfToFloatBits(t0));
1681 c.y = As<Float4>(halfToFloatBits(t1));
1682 c.z = As<Float4>(halfToFloatBits(t2));
1683 c.w = As<Float4>(halfToFloatBits(t3));
1684 transpose4x4(c.x, c.y, c.z, c.w);
1685 break;
1686 case VK_FORMAT_R32_SFLOAT:
1687 case VK_FORMAT_R32_SINT:
1688 case VK_FORMAT_R32_UINT:
Chris Forbes7d091432019-05-02 11:52:08 -07001689 case VK_FORMAT_D32_SFLOAT:
Chris Forbes8d7f2332019-05-02 10:58:11 -07001690 // FIXME: Optimal shuffling?
1691 c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
1692 c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
1693 c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
1694 c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
1695 break;
1696 case VK_FORMAT_R32G32_SFLOAT:
1697 case VK_FORMAT_R32G32_SINT:
1698 case VK_FORMAT_R32G32_UINT:
1699 // FIXME: Optimal shuffling?
1700 c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
1701 c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
1702 c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
1703 c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
1704 c.y = c.x;
1705 c.x = Float4(c.x.xz, c.z.xz);
1706 c.y = Float4(c.y.yw, c.z.yw);
1707 break;
1708 case VK_FORMAT_R32G32B32_SFLOAT:
1709 case VK_FORMAT_R32G32B32_SINT:
1710 case VK_FORMAT_R32G32B32_UINT:
1711 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
1712 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
1713 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
1714 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
1715 transpose4x3(c.x, c.y, c.z, c.w);
1716 break;
1717 case VK_FORMAT_R32G32B32A32_SFLOAT:
1718 case VK_FORMAT_R32G32B32A32_SINT:
1719 case VK_FORMAT_R32G32B32A32_UINT:
1720 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
1721 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
1722 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
1723 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
1724 transpose4x4(c.x, c.y, c.z, c.w);
1725 break;
Chris Forbes1f618582019-05-02 11:22:56 -07001726 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
1727 {
1728 Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
1729 t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
1730 t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
1731 t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
1732 t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
1733 t0 = As<UInt4>(t);
1734 c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
1735 c.x = Float4((t0) & UInt4(0x1FF)) * c.w;
1736 c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
1737 c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
1738 break;
1739 }
Chris Forbesc57ec6f2019-05-02 11:44:41 -07001740 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1741 {
1742 Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
1743 t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
1744 t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
1745 t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
1746 t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
1747 t0 = As<UInt4>(t);
1748 c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
1749 c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
1750 c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
1751 break;
1752 }
Chris Forbes8d7f2332019-05-02 10:58:11 -07001753 default:
1754 UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
Nicolas Capens68a82382018-10-02 13:16:55 -04001755 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001756 }
1757 else
1758 {
1759 ASSERT(!hasYuvFormat());
1760
1761 Vector4s cs = sampleTexel(index, buffer);
1762
Alexis Hetu696926d2019-03-18 11:30:01 -04001763 bool isInteger = state.textureFormat.isNonNormalizedInteger();
Nicolas Capens68a82382018-10-02 13:16:55 -04001764 int componentCount = textureComponentCount();
1765 for(int n = 0; n < componentCount; n++)
1766 {
1767 if(hasUnsignedTextureComponent(n))
1768 {
1769 if(isInteger)
1770 {
1771 c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
1772 }
1773 else
1774 {
1775 c[n] = Float4(As<UShort4>(cs[n]));
1776 }
1777 }
1778 else
1779 {
1780 if(isInteger)
1781 {
1782 c[n] = As<Float4>(Int4(cs[n]));
1783 }
1784 else
1785 {
1786 c[n] = Float4(cs[n]);
1787 }
1788 }
1789 }
1790 }
1791
Chris Forbesc71c17f2019-05-04 10:01:04 -07001792 if(state.compareEnable)
Chris Forbes71b58b12019-05-04 09:57:09 -07001793 {
1794 Float4 ref = z;
1795
1796 if(!hasFloatTexture())
1797 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07001798 // D16_UNORM: clamp reference, normalize texel value
Chris Forbes71b58b12019-05-04 09:57:09 -07001799 ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f));
Chris Forbesc71c17f2019-05-04 10:01:04 -07001800 c.x = c.x * Float4(1.0f / 0xFFFF);
Chris Forbes71b58b12019-05-04 09:57:09 -07001801 }
1802
1803 Int4 boolean;
1804
Chris Forbesc71c17f2019-05-04 10:01:04 -07001805 switch(state.compareOp)
Chris Forbes71b58b12019-05-04 09:57:09 -07001806 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07001807 case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break;
1808 case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
1809 case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break;
1810 case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break;
1811 case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break;
1812 case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break;
1813 case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break;
1814 case VK_COMPARE_OP_NEVER: boolean = Int4(0); break;
Chris Forbes71b58b12019-05-04 09:57:09 -07001815 default: ASSERT(false);
1816 }
1817
1818 c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
1819 c.y = Float4(0.0f);
1820 c.z = Float4(0.0f);
1821 c.w = Float4(1.0f);
1822 }
1823
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001824 if(borderModeActive())
1825 {
1826 c = replaceBorderTexel(c, valid);
1827 }
1828
Nicolas Capens68a82382018-10-02 13:16:55 -04001829 return c;
1830 }
1831
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001832 Vector4f SamplerCore::replaceBorderTexel(const Vector4f &c, Int4 valid)
1833 {
1834 Int4 borderRGB;
1835 Int4 borderA;
1836
1837 bool scaled = !hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable;
1838 bool sign = !hasUnsignedTextureComponent(0);
1839 Int4 float_one = scaled ? As<Int4>(Float4(sign ? 0x7FFF : 0xFFFF)) : As<Int4>(Float4(1.0f));
1840
1841 switch(state.border)
1842 {
1843 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
1844 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
1845 borderRGB = Int4(0);
1846 borderA = Int4(0);
1847 break;
1848 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
1849 borderRGB = Int4(0);
1850 borderA = float_one;
1851 break;
1852 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
1853 borderRGB = Int4(0);
1854 borderA = Int4(1);
1855 break;
1856 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
1857 borderRGB = float_one;
1858 borderA = float_one;
1859 break;
1860 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
1861 borderRGB = Int4(1);
1862 borderA = Int4(1);
1863 break;
1864 default:
1865 UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border);
1866 }
1867
1868 Vector4f out;
1869 out.x = As<Float4>((valid & As<Int4>(c.x)) | (~valid & borderRGB));
1870 out.y = As<Float4>((valid & As<Int4>(c.y)) | (~valid & borderRGB));
1871 out.z = As<Float4>((valid & As<Int4>(c.z)) | (~valid & borderRGB));
1872 out.w = As<Float4>((valid & As<Int4>(c.w)) | (~valid & borderA));
1873
1874 return out;
1875 }
1876
Nicolas Capens68a82382018-10-02 13:16:55 -04001877 void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
1878 {
1879 if(state.mipmapFilter == MIPMAP_NONE)
1880 {
1881 mipmap = texture + OFFSET(Texture,mipmap[0]);
1882 }
1883 else
1884 {
1885 Int ilod;
1886
1887 if(state.mipmapFilter == MIPMAP_POINT)
1888 {
Nicolas Capens9e735102019-04-18 15:03:06 -04001889 // TODO: Preferred formula is ceil(lod + 0.5) - 1
Nicolas Capens68a82382018-10-02 13:16:55 -04001890 ilod = RoundInt(lod);
1891 }
1892 else // MIPMAP_LINEAR
1893 {
1894 ilod = Int(lod);
1895 }
1896
1897 mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
1898 }
1899
1900 if(state.textureType != TEXTURE_CUBE)
1901 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001902 buffer[0] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[0]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001903
1904 if(hasYuvFormat())
1905 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001906 buffer[1] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[1]));
1907 buffer[2] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[2]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001908 }
1909 }
1910 else
1911 {
1912 for(int i = 0; i < 4; i++)
1913 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001914 buffer[i] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
Nicolas Capens68a82382018-10-02 13:16:55 -04001915 }
1916 }
1917 }
1918
1919 Int4 SamplerCore::computeFilterOffset(Float &lod)
1920 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001921 if(state.textureFilter == FILTER_POINT)
1922 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001923 return Int4(0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001924 }
1925 else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
1926 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001927 return CmpNLE(Float4(lod), Float4(0.0f));
Nicolas Capens68a82382018-10-02 13:16:55 -04001928 }
1929 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
1930 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001931 return CmpLE(Float4(lod), Float4(0.0f));
Nicolas Capens68a82382018-10-02 13:16:55 -04001932 }
1933
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04001934 return Int4(~0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001935 }
1936
1937 Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap)
1938 {
Nicolas Capense2535df2019-05-06 10:37:50 -04001939 if(addressingMode == ADDRESSING_LAYER)
Nicolas Capens68a82382018-10-02 13:16:55 -04001940 {
1941 return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
1942 }
1943 else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER)
1944 {
1945 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
1946
1947 return Short4(Int4(clamp * Float4(1 << 16)));
1948 }
1949 else if(addressingMode == ADDRESSING_MIRROR)
1950 {
1951 Int4 convert = Int4(uw * Float4(1 << 16));
1952 Int4 mirror = (convert << 15) >> 31;
1953
1954 convert ^= mirror;
1955
1956 return Short4(convert);
1957 }
1958 else if(addressingMode == ADDRESSING_MIRRORONCE)
1959 {
1960 // Absolute value
1961 Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
1962
1963 // Clamp
1964 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
1965 convert = As<Int4>(PackSigned(convert, convert));
1966
1967 return As<Short4>(Int2(convert)) + Short4(0x8000u);
1968 }
1969 else // Wrap
1970 {
1971 return Short4(Int4(uw * Float4(1 << 16)));
1972 }
1973 }
1974
1975 void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
1976 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001977 Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
1978 Int4 maxXYZ = dim - Int4(1);
1979
1980 if(function == Fetch)
1981 {
1982 xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
1983 }
Nicolas Capense2535df2019-05-06 10:37:50 -04001984 else if(addressingMode == ADDRESSING_LAYER) // Note: Offset does not apply to array layers
Nicolas Capens68a82382018-10-02 13:16:55 -04001985 {
1986 xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
1987 }
1988 else
1989 {
1990 const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
1991 const int oneBits = 0x3F7FFFFF; // Value just under 1.0f
1992 const int twoBits = 0x3FFFFFFF; // Value just under 2.0f
1993
1994 bool pointFilter = state.textureFilter == FILTER_POINT ||
1995 state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
1996 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT;
1997
1998 Float4 coord = uvw;
1999
2000 if(state.textureType == TEXTURE_RECTANGLE)
2001 {
2002 // According to https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_rectangle.txt
2003 // "CLAMP_TO_EDGE causes the s coordinate to be clamped to the range[0.5, wt - 0.5].
2004 // CLAMP_TO_EDGE causes the t coordinate to be clamped to the range[0.5, ht - 0.5]."
2005 // Unless SwiftShader implements support for ADDRESSING_BORDER, other modes should be equivalent
2006 // to CLAMP_TO_EDGE. Rectangle textures have no support for any MIRROR or REPEAT modes.
2007 coord = Min(Max(coord, Float4(0.5f)), Float4(dim) - Float4(0.5f));
2008 }
2009 else
2010 {
2011 switch(addressingMode)
2012 {
2013 case ADDRESSING_CLAMP:
Nicolas Capens68a82382018-10-02 13:16:55 -04002014 case ADDRESSING_SEAMLESS:
2015 // Linear filtering of cube doesn't require clamping because the coordinates
2016 // are already in [0, 1] range and numerical imprecision is tolerated.
2017 if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
2018 {
2019 Float4 one = As<Float4>(Int4(oneBits));
2020 coord = Min(Max(coord, Float4(0.0f)), one);
2021 }
2022 break;
2023 case ADDRESSING_MIRROR:
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002024 {
2025 Float4 half = As<Float4>(Int4(halfBits));
2026 Float4 one = As<Float4>(Int4(oneBits));
2027 Float4 two = As<Float4>(Int4(twoBits));
2028 coord = one - Abs(two * Frac(coord * half) - one);
2029 }
2030 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04002031 case ADDRESSING_MIRRORONCE:
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002032 {
2033 Float4 half = As<Float4>(Int4(halfBits));
2034 Float4 one = As<Float4>(Int4(oneBits));
2035 Float4 two = As<Float4>(Int4(twoBits));
2036 coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
2037 }
2038 break;
2039 case ADDRESSING_BORDER:
2040 // Don't map to a valid range here.
2041 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04002042 default: // Wrap
2043 coord = Frac(coord);
2044 break;
2045 }
2046
2047 coord = coord * Float4(dim);
2048 }
2049
2050 if(state.textureFilter == FILTER_POINT ||
2051 state.textureFilter == FILTER_GATHER)
2052 {
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002053 if(addressingMode == ADDRESSING_BORDER)
2054 {
2055 xyz0 = Int4(Floor(coord));
2056 }
2057 else // Can't have negative coordinates, so floor() is redundant when casting to int.
2058 {
2059 xyz0 = Int4(coord);
2060 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002061 }
2062 else
2063 {
2064 if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
2065 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2066 {
2067 coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter);
2068 }
2069 else
2070 {
2071 coord -= Float4(0.5f);
2072 }
2073
2074 Float4 floor = Floor(coord);
2075 xyz0 = Int4(floor);
2076 f = coord - floor;
2077 }
2078
2079 if(function.option == Offset)
2080 {
2081 xyz0 += As<Int4>(texOffset);
2082 }
2083
2084 if(addressingMode == ADDRESSING_SEAMLESS)
2085 {
2086 xyz0 += Int4(1);
2087 }
2088
2089 xyz1 = xyz0 - filter; // Increment
2090
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002091 if(addressingMode == ADDRESSING_BORDER)
2092 {
2093 // Replace the coordinates with -1 if they're out of range.
2094 Int4 border0 = CmpLT(xyz0, Int4(0)) | CmpNLT(xyz0, dim);
2095 Int4 border1 = CmpLT(xyz1, Int4(0)) | CmpNLT(xyz1, dim);
2096 xyz0 |= border0;
2097 xyz1 |= border1;
2098 }
2099 else if(function.option == Offset)
Nicolas Capens68a82382018-10-02 13:16:55 -04002100 {
2101 switch(addressingMode)
2102 {
2103 case ADDRESSING_SEAMLESS:
2104 ASSERT(false); // Cube sampling doesn't support offset.
2105 case ADDRESSING_MIRROR:
2106 case ADDRESSING_MIRRORONCE:
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002107 // FIXME: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
Nicolas Capens68a82382018-10-02 13:16:55 -04002108 // Fall through to Clamp.
2109 case ADDRESSING_CLAMP:
2110 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
2111 xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
2112 break;
2113 default: // Wrap
2114 xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
2115 xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
2116 break;
2117 }
2118 }
2119 else if(state.textureFilter != FILTER_POINT)
2120 {
2121 switch(addressingMode)
2122 {
2123 case ADDRESSING_SEAMLESS:
2124 break;
2125 case ADDRESSING_MIRROR:
2126 case ADDRESSING_MIRRORONCE:
Nicolas Capens68a82382018-10-02 13:16:55 -04002127 case ADDRESSING_CLAMP:
2128 xyz0 = Max(xyz0, Int4(0));
2129 xyz1 = Min(xyz1, maxXYZ);
2130 break;
2131 default: // Wrap
2132 {
2133 Int4 under = CmpLT(xyz0, Int4(0));
2134 xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // FIXME: IfThenElse()
2135
2136 Int4 nover = CmpLT(xyz1, dim);
2137 xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz
2138 }
2139 break;
2140 }
2141 }
2142 }
2143 }
2144
Nicolas Capens68a82382018-10-02 13:16:55 -04002145 void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
2146 {
2147 cf = Float4(cs) * Float4(1.0f / 0x7FFF);
2148 }
2149
2150 void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
2151 {
2152 cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
2153 }
2154
2155 void SamplerCore::sRGBtoLinear16_8_16(Short4 &c)
2156 {
2157 c = As<UShort4>(c) >> 8;
2158
2159 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
2160
2161 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2162 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2163 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2164 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2165 }
2166
Nicolas Capens68a82382018-10-02 13:16:55 -04002167 bool SamplerCore::hasFloatTexture() const
2168 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002169 return state.textureFormat.isFloatFormat();
Nicolas Capens68a82382018-10-02 13:16:55 -04002170 }
2171
2172 bool SamplerCore::hasUnnormalizedIntegerTexture() const
2173 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002174 return state.textureFormat.isNonNormalizedInteger();
Nicolas Capens68a82382018-10-02 13:16:55 -04002175 }
2176
2177 bool SamplerCore::hasUnsignedTextureComponent(int component) const
2178 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002179 return state.textureFormat.isUnsignedComponent(component);
Nicolas Capens68a82382018-10-02 13:16:55 -04002180 }
2181
2182 int SamplerCore::textureComponentCount() const
2183 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002184 return state.textureFormat.componentCount();
Nicolas Capens68a82382018-10-02 13:16:55 -04002185 }
2186
2187 bool SamplerCore::hasThirdCoordinate() const
2188 {
Nicolas Capense2535df2019-05-06 10:37:50 -04002189 return (state.textureType == TEXTURE_3D) ||
2190 (state.textureType == TEXTURE_2D_ARRAY) ||
2191 (state.textureType == TEXTURE_1D_ARRAY); // Treated as 2D texture with second coordinate 0.
Nicolas Capens68a82382018-10-02 13:16:55 -04002192 }
2193
2194 bool SamplerCore::has16bitTextureFormat() const
2195 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002196 return state.textureFormat.has16bitTextureFormat();
Nicolas Capens68a82382018-10-02 13:16:55 -04002197 }
2198
2199 bool SamplerCore::has8bitTextureComponents() const
2200 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002201 return state.textureFormat.has8bitTextureComponents();
Nicolas Capens68a82382018-10-02 13:16:55 -04002202 }
2203
2204 bool SamplerCore::has16bitTextureComponents() const
2205 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002206 return state.textureFormat.has16bitTextureComponents();
Nicolas Capens68a82382018-10-02 13:16:55 -04002207 }
2208
2209 bool SamplerCore::has32bitIntegerTextureComponents() const
2210 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002211 return state.textureFormat.has32bitIntegerTextureComponents();
Nicolas Capens68a82382018-10-02 13:16:55 -04002212 }
2213
2214 bool SamplerCore::hasYuvFormat() const
2215 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002216 return state.textureFormat.hasYuvFormat();
Nicolas Capens68a82382018-10-02 13:16:55 -04002217 }
2218
2219 bool SamplerCore::isRGBComponent(int component) const
2220 {
Alexis Hetu696926d2019-03-18 11:30:01 -04002221 return state.textureFormat.isRGBComponent(component);
Nicolas Capens68a82382018-10-02 13:16:55 -04002222 }
Nicolas Capens60a6a2e2019-05-06 15:09:44 -04002223
2224 bool SamplerCore::borderModeActive() const
2225 {
2226 return state.addressingModeU == ADDRESSING_BORDER ||
2227 state.addressingModeV == ADDRESSING_BORDER ||
2228 state.addressingModeW == ADDRESSING_BORDER;
2229 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002230}