blob: e57cfc72dd15e9064312624c4a3de787ea478be5 [file] [log] [blame]
Chris Forbesaf4ed532018-12-06 18:33:27 -08001// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Chris Forbesaf4ed532018-12-06 18:33:27 -080015#include "SpirvShader.hpp"
Nicolas Capens7d867272019-04-08 22:51:08 -040016#include "SamplerCore.hpp"
Ben Claytonecfeede2019-05-08 08:51:01 +010017
18#include "Reactor/Coroutine.hpp"
Chris Forbesaf4ed532018-12-06 18:33:27 -080019#include "System/Math.hpp"
Ben Claytonefec1b92019-03-05 17:38:16 +000020#include "Vulkan/VkBuffer.hpp"
Chris Forbes58228822019-04-17 12:51:29 -070021#include "Vulkan/VkBufferView.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Ben Clayton225a1302019-04-02 12:28:22 +010023#include "Vulkan/VkDescriptorSet.hpp"
Ben Clayton76e9bc02019-02-26 15:02:18 +000024#include "Vulkan/VkPipelineLayout.hpp"
Nicolas Capens09591b82019-04-08 22:51:08 -040025#include "Vulkan/VkDescriptorSetLayout.hpp"
Chris Forbes24466042019-04-22 10:54:23 -070026#include "Vulkan/VkRenderPass.hpp"
Chris Forbesaf4ed532018-12-06 18:33:27 -080027#include "Device/Config.hpp"
28
Nicolas Capens82eb22e2019-04-10 01:15:43 -040029#include <spirv/unified1/spirv.hpp>
30#include <spirv/unified1/GLSL.std.450.h>
31
Ben Claytone747b3c2019-03-21 19:35:15 +000032namespace
33{
Ben Claytona15fcf42019-04-09 13:04:51 -040034 constexpr float PI = 3.141592653589793f;
35
Ben Claytone747b3c2019-03-21 19:35:15 +000036 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
37 {
38 return rr::SignMask(ints) != 0;
39 }
40
41 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
42 {
43 return rr::SignMask(~ints) != 0;
44 }
Ben Claytond86db952019-04-08 13:43:11 -040045
46 // Returns 1 << bits.
47 // If the resulting bit overflows a 32 bit integer, 0 is returned.
48 rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits)
49 {
50 return ((sw::SIMD::UInt(1) << bits) & rr::CmpLT(bits, sw::SIMD::UInt(32)));
51 }
52
53 // Returns bitCount number of of 1's starting from the LSB.
54 rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount)
55 {
56 return NthBit32(bitCount) - sw::SIMD::UInt(1);
57 }
Ben Clayton6caf8212019-04-09 11:28:39 -040058
59 // Performs a fused-multiply add, returning a * b + c.
60 rr::RValue<sw::SIMD::Float> FMA(
61 rr::RValue<sw::SIMD::Float> const &a,
62 rr::RValue<sw::SIMD::Float> const &b,
63 rr::RValue<sw::SIMD::Float> const &c)
64 {
65 return a * b + c;
66 }
Ben Clayton20f6ba82019-04-09 12:07:29 -040067
68 // Returns the exponent of the floating point number f.
69 // Assumes IEEE 754
70 rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f)
71 {
72 auto v = rr::As<sw::SIMD::UInt>(f);
73 return (sw::SIMD::Int((v >> sw::SIMD::UInt(23)) & sw::SIMD::UInt(0xFF)) - sw::SIMD::Int(126));
74 }
Ben Claytonee10bcf2019-04-09 17:01:01 -040075
76 // Returns y if y < x; otherwise result is x.
77 // If one operand is a NaN, the other operand is the result.
78 // If both operands are NaN, the result is a NaN.
79 rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
80 {
81 using namespace rr;
82 auto xIsNan = IsNan(x);
83 auto yIsNan = IsNan(y);
84 return As<sw::SIMD::Float>(
85 // If neither are NaN, return min
86 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
87 // If one operand is a NaN, the other operand is the result
88 // If both operands are NaN, the result is a NaN.
89 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
90 (( xIsNan ) & As<sw::SIMD::Int>(y)));
91 }
Ben Clayton02de7e02019-04-09 17:01:26 -040092
93 // Returns y if y > x; otherwise result is x.
94 // If one operand is a NaN, the other operand is the result.
95 // If both operands are NaN, the result is a NaN.
96 rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
97 {
98 using namespace rr;
99 auto xIsNan = IsNan(x);
100 auto yIsNan = IsNan(y);
101 return As<sw::SIMD::Float>(
102 // If neither are NaN, return max
103 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
104 // If one operand is a NaN, the other operand is the result
105 // If both operands are NaN, the result is a NaN.
106 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
107 (( xIsNan ) & As<sw::SIMD::Int>(y)));
108 }
Ben Clayton1fb633c2019-04-09 17:24:59 -0400109
110 // Returns the determinant of a 2x2 matrix.
111 rr::RValue<sw::SIMD::Float> Determinant(
112 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
113 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
114 {
115 return a*d - b*c;
116 }
117
118 // Returns the determinant of a 3x3 matrix.
119 rr::RValue<sw::SIMD::Float> Determinant(
120 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
121 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
122 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
123 {
124 return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h;
125 }
126
127 // Returns the determinant of a 4x4 matrix.
128 rr::RValue<sw::SIMD::Float> Determinant(
129 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
130 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
131 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
132 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
133 {
134 return a * Determinant(f, g, h,
135 j, k, l,
136 n, o, p) -
137 b * Determinant(e, g, h,
138 i, k, l,
139 m, o, p) +
140 c * Determinant(e, f, h,
141 i, j, l,
142 m, n, p) -
143 d * Determinant(e, f, g,
144 i, j, k,
145 m, n, o);
146 }
Ben Clayton445a44a2019-04-10 16:37:19 -0400147
148 // Returns the inverse of a 2x2 matrix.
149 std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
150 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
151 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
152 {
153 auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d);
154 return {{s*d, -s*b, -s*c, s*a}};
155 }
156
157 // Returns the inverse of a 3x3 matrix.
158 std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
159 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
160 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
161 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
162 {
163 auto s = sw::SIMD::Float(1.0f) / Determinant(
164 a, b, c,
165 d, e, f,
166 g, h, i); // TODO: duplicate arithmetic calculating the det and below.
167
168 return {{
169 s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e),
170 s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f),
171 s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d),
172 }};
173 }
174
175 // Returns the inverse of a 4x4 matrix.
176 std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
177 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
178 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
179 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
180 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
181 {
182 auto s = sw::SIMD::Float(1.0f) / Determinant(
183 a, b, c, d,
184 e, f, g, h,
185 i, j, k, l,
186 m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
187
188 auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n;
189 auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n;
190 auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j;
191 auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m;
192 auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i;
193 auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i;
194
195 return {{
196 s * ( f * kplo - g * jpln + h * jokn),
197 s * (-b * kplo + c * jpln - d * jokn),
198 s * ( b * gpho - c * fphn + d * fogn),
199 s * (-b * glhk + c * flhj - d * fkgj),
200
201 s * (-e * kplo + g * iplm - h * iokm),
202 s * ( a * kplo - c * iplm + d * iokm),
203 s * (-a * gpho + c * ephm - d * eogm),
204 s * ( a * glhk - c * elhi + d * ekgi),
205
206 s * ( e * jpln - f * iplm + h * injm),
207 s * (-a * jpln + b * iplm - d * injm),
208 s * ( a * fphn - b * ephm + d * enfm),
209 s * (-a * flhj + b * elhi - d * ejfi),
210
211 s * (-e * jokn + f * iokm - g * injm),
212 s * ( a * jokn - b * iokm + c * injm),
213 s * (-a * fogn + b * eogm - c * enfm),
214 s * ( a * fkgj - b * ekgi + c * ejfi),
215 }};
216 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400217
Chris Forbes24466042019-04-22 10:54:23 -0700218
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400219 sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
220 {
221 p *= sw::SIMD::Width;
222 p.staticOffsets[0] += 0 * sizeof(float);
223 p.staticOffsets[1] += 1 * sizeof(float);
224 p.staticOffsets[2] += 2 * sizeof(float);
225 p.staticOffsets[3] += 3 * sizeof(float);
226 return p;
227 }
228
Chris Forbes24466042019-04-22 10:54:23 -0700229 VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
230 {
231 switch (format)
232 {
233 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
234 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
235 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
236 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
237 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
238 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
239 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
240 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
241 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
242 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
243 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
244 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
245 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
246
247 default:
248 UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
249 return VK_FORMAT_UNDEFINED;
250 }
251 }
252
Chris Forbesa32d6302019-04-26 14:19:04 -0700253 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
254 {
255 sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
256 sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
257
258 sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
259
260 sw::SIMD::Float s = c;
261 s.xyz = rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // FIXME: IfThenElse()
262
263 return s;
264 }
265
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400266} // anonymous namespace
Ben Claytone747b3c2019-03-21 19:35:15 +0000267
Chris Forbesaf4ed532018-12-06 18:33:27 -0800268namespace sw
269{
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400270 namespace SIMD
271 {
272
273 template<typename T>
274 T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
275 {
276 using EL = typename Element<T>::type;
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400277 auto offsets = ptr.offsets();
278 mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
Ben Clayton0fc611f2019-04-18 11:23:27 -0400279 if (!atomic && order == std::memory_order_relaxed)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400280 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400281 return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, sizeof(float));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400282 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400283 else
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400284 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400285 T out;
286 auto anyLanesDisabled = AnyFalse(mask);
287 If(ptr.hasEqualOffsets() && !anyLanesDisabled)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400288 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400289 // Load one, replicate.
290 auto offset = Extract(offsets, 0);
291 out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
292 }
293 Else If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
294 {
295 // Load all elements in a single SIMD instruction.
296 auto offset = Extract(offsets, 0);
297 out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
298 }
299 Else
300 {
301 // Divergent offsets or masked lanes.
302 out = T(0);
303 for (int i = 0; i < SIMD::Width; i++)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400304 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400305 If(Extract(mask, i) != 0)
306 {
307 auto offset = Extract(offsets, i);
308 auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
309 out = Insert(out, el, i);
310 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400311 }
312 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400313 return out;
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400314 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400315 }
316
317 template<typename T>
318 void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
319 {
320 using EL = typename Element<T>::type;
321 auto offsets = ptr.offsets();
322 mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
Ben Clayton0fc611f2019-04-18 11:23:27 -0400323 if (!atomic && order == std::memory_order_relaxed)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400324 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400325 return rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, sizeof(float));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400326 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400327 else
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400328 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400329 auto anyLanesDisabled = AnyFalse(mask);
330 If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400331 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400332 // Store all elements in a single SIMD instruction.
333 auto offset = Extract(offsets, 0);
334 Store(val, rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
335 }
336 Else
337 {
338 // Divergent offsets or masked lanes.
339 for (int i = 0; i < SIMD::Width; i++)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400340 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400341 If(Extract(mask, i) != 0)
342 {
343 auto offset = Extract(offsets, i);
344 rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
345 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400346 }
347 }
348 }
349 }
350
351 } // namespace SIMD
352
Chris Forbesaf4ed532018-12-06 18:33:27 -0800353 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
354
Ben Clayton60f15ec2019-05-09 17:50:01 +0100355 SpirvShader::SpirvShader(
356 VkPipelineShaderStageCreateInfo const *createInfo,
357 InsnStore const &insns,
358 vk::RenderPass *renderPass,
359 uint32_t subpassIndex)
360 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
361 outputs{MAX_INTERFACE_COMPONENTS},
362 serialID{serialCounter++}, modes{}
Chris Forbesaf4ed532018-12-06 18:33:27 -0800363 {
Ben Clayton45faa082019-03-05 13:20:40 +0000364 ASSERT(insns.size() > 0);
365
Chris Forbes24466042019-04-22 10:54:23 -0700366 if (renderPass != VK_NULL_HANDLE)
367 {
368 // capture formats of any input attachments present
369 auto subpass = renderPass->getSubpass(subpassIndex);
370 inputAttachmentFormats.reserve(subpass.inputAttachmentCount);
371 for (auto i = 0u; i < subpass.inputAttachmentCount; i++)
372 {
373 auto attachmentIndex = subpass.pInputAttachments[i].attachment;
374 inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
375 ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
376 }
377 }
378
Chris Forbesaf4ed532018-12-06 18:33:27 -0800379 // Simplifying assumptions (to be satisfied by earlier transformations)
Chris Forbesaf4ed532018-12-06 18:33:27 -0800380 // - The only input/output OpVariables present are those used by the entrypoint
381
Ben Clayton60f15ec2019-05-09 17:50:01 +0100382 Object::ID entryPointFunctionId;
Ben Clayton9b156612019-03-13 19:48:31 +0000383 Block::ID currentBlock;
384 InsnIterator blockStart;
Chris Forbese57f10e2019-03-04 10:53:07 -0800385
Chris Forbes4a979dc2019-01-17 09:36:46 -0800386 for (auto insn : *this)
387 {
Nicolas Capens125dba02019-04-24 02:03:22 -0400388 spv::Op opcode = insn.opcode();
389
390 switch (opcode)
Chris Forbes4a979dc2019-01-17 09:36:46 -0800391 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100392 case spv::OpEntryPoint:
393 {
394 auto executionModel = spv::ExecutionModel(insn.word(1));
395 auto id = Object::ID(insn.word(2));
396 auto name = insn.string(3);
397 auto stage = executionModelToStage(executionModel);
398 if (stage == createInfo->stage && strcmp(name, createInfo->pName) == 0)
399 {
400 ASSERT_MSG(entryPointFunctionId == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
401 entryPointFunctionId = id;
402 }
403 break;
404 }
405
Chris Forbes4a979dc2019-01-17 09:36:46 -0800406 case spv::OpExecutionMode:
407 ProcessExecutionMode(insn);
408 break;
Chris Forbesaf4ed532018-12-06 18:33:27 -0800409
Chris Forbesc25b8072018-12-10 15:10:39 -0800410 case spv::OpDecorate:
411 {
Ben Claytonab51bbf2019-02-20 14:36:27 +0000412 TypeOrObjectID targetId = insn.word(1);
Chris Forbes93f70b32019-02-10 21:26:27 +0000413 auto decoration = static_cast<spv::Decoration>(insn.word(2));
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400414 uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
415
416 decorations[targetId].Apply(decoration, value);
417
418 switch(decoration)
419 {
420 case spv::DecorationDescriptorSet:
421 descriptorDecorations[targetId].DescriptorSet = value;
422 break;
423 case spv::DecorationBinding:
424 descriptorDecorations[targetId].Binding = value;
425 break;
Chris Forbes24466042019-04-22 10:54:23 -0700426 case spv::DecorationInputAttachmentIndex:
427 descriptorDecorations[targetId].InputAttachmentIndex = value;
428 break;
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400429 default:
430 // Only handling descriptor decorations here.
431 break;
432 }
Chris Forbes93f70b32019-02-10 21:26:27 +0000433
434 if (decoration == spv::DecorationCentroid)
435 modes.NeedsCentroid = true;
Chris Forbesc25b8072018-12-10 15:10:39 -0800436 break;
437 }
438
439 case spv::OpMemberDecorate:
440 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000441 Type::ID targetId = insn.word(1);
Chris Forbesc25b8072018-12-10 15:10:39 -0800442 auto memberIndex = insn.word(2);
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400443 auto decoration = static_cast<spv::Decoration>(insn.word(3));
444 uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
445
Chris Forbesc25b8072018-12-10 15:10:39 -0800446 auto &d = memberDecorations[targetId];
447 if (memberIndex >= d.size())
448 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
Chris Forbes58228822019-04-17 12:51:29 -0700449
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400450 d[memberIndex].Apply(decoration, value);
Chris Forbes93f70b32019-02-10 21:26:27 +0000451
452 if (decoration == spv::DecorationCentroid)
453 modes.NeedsCentroid = true;
Chris Forbesc25b8072018-12-10 15:10:39 -0800454 break;
455 }
456
457 case spv::OpDecorationGroup:
458 // Nothing to do here. We don't need to record the definition of the group; we'll just have
459 // the bundle of decorations float around. If we were to ever walk the decorations directly,
460 // we might think about introducing this as a real Object.
461 break;
462
463 case spv::OpGroupDecorate:
464 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400465 uint32_t group = insn.word(1);
466 auto const &groupDecorations = decorations[group];
467 auto const &descriptorGroupDecorations = descriptorDecorations[group];
Chris Forbesc25b8072018-12-10 15:10:39 -0800468 for (auto i = 2u; i < insn.wordCount(); i++)
469 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400470 // Remaining operands are targets to apply the group to.
471 uint32_t target = insn.word(i);
472 decorations[target].Apply(groupDecorations);
473 descriptorDecorations[target].Apply(descriptorGroupDecorations);
Chris Forbesc25b8072018-12-10 15:10:39 -0800474 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400475
Chris Forbesc25b8072018-12-10 15:10:39 -0800476 break;
477 }
478
479 case spv::OpGroupMemberDecorate:
480 {
481 auto const &srcDecorations = decorations[insn.word(1)];
482 for (auto i = 2u; i < insn.wordCount(); i += 2)
483 {
484 // remaining operands are pairs of <id>, literal for members to apply to.
485 auto &d = memberDecorations[insn.word(i)];
486 auto memberIndex = insn.word(i + 1);
487 if (memberIndex >= d.size())
488 d.resize(memberIndex + 1); // on demand resize, see above...
489 d[memberIndex].Apply(srcDecorations);
490 }
491 break;
492 }
493
Chris Forbese57f10e2019-03-04 10:53:07 -0800494 case spv::OpLabel:
Ben Clayton9b156612019-03-13 19:48:31 +0000495 {
496 ASSERT(currentBlock.value() == 0);
497 currentBlock = Block::ID(insn.word(1));
498 blockStart = insn;
Chris Forbese57f10e2019-03-04 10:53:07 -0800499 break;
Ben Clayton9b156612019-03-13 19:48:31 +0000500 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800501
Ben Clayton9b156612019-03-13 19:48:31 +0000502 // Branch Instructions (subset of Termination Instructions):
503 case spv::OpBranch:
504 case spv::OpBranchConditional:
505 case spv::OpSwitch:
Chris Forbese57f10e2019-03-04 10:53:07 -0800506 case spv::OpReturn:
Ben Clayton9b156612019-03-13 19:48:31 +0000507 // fallthrough
508
509 // Termination instruction:
510 case spv::OpKill:
511 case spv::OpUnreachable:
512 {
513 ASSERT(currentBlock.value() != 0);
514 auto blockEnd = insn; blockEnd++;
515 blocks[currentBlock] = Block(blockStart, blockEnd);
516 currentBlock = Block::ID(0);
517
Nicolas Capens125dba02019-04-24 02:03:22 -0400518 if (opcode == spv::OpKill)
Ben Clayton9b156612019-03-13 19:48:31 +0000519 {
520 modes.ContainsKill = true;
521 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800522 break;
Ben Clayton9b156612019-03-13 19:48:31 +0000523 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800524
Ben Claytone747b3c2019-03-21 19:35:15 +0000525 case spv::OpLoopMerge:
Ben Clayton9fd02e02019-03-21 18:47:15 +0000526 case spv::OpSelectionMerge:
527 break; // Nothing to do in analysis pass.
528
Chris Forbes4a979dc2019-01-17 09:36:46 -0800529 case spv::OpTypeVoid:
530 case spv::OpTypeBool:
531 case spv::OpTypeInt:
532 case spv::OpTypeFloat:
533 case spv::OpTypeVector:
534 case spv::OpTypeMatrix:
535 case spv::OpTypeImage:
536 case spv::OpTypeSampler:
537 case spv::OpTypeSampledImage:
538 case spv::OpTypeArray:
539 case spv::OpTypeRuntimeArray:
540 case spv::OpTypeStruct:
541 case spv::OpTypePointer:
542 case spv::OpTypeFunction:
Ben Clayton0bb83b82019-02-26 11:41:07 +0000543 DeclareType(insn);
Chris Forbes4a979dc2019-01-17 09:36:46 -0800544 break;
Chris Forbes296aa252018-12-27 11:48:21 -0800545
Chris Forbes4a979dc2019-01-17 09:36:46 -0800546 case spv::OpVariable:
547 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000548 Type::ID typeId = insn.word(1);
549 Object::ID resultId = insn.word(2);
Chris Forbes4a979dc2019-01-17 09:36:46 -0800550 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
Chris Forbes296aa252018-12-27 11:48:21 -0800551
Chris Forbes4a979dc2019-01-17 09:36:46 -0800552 auto &object = defs[resultId];
Ben Clayton1d514f32019-04-19 16:11:18 -0400553 object.kind = Object::Kind::Pointer;
Chris Forbes4a979dc2019-01-17 09:36:46 -0800554 object.definition = insn;
Ben Clayton9a162482019-02-25 11:54:43 +0000555 object.type = typeId;
Chris Forbesc25b8072018-12-10 15:10:39 -0800556
Ben Claytonecd38482019-04-19 17:11:08 -0400557 ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
Ben Claytonefec1b92019-03-05 17:38:16 +0000558 ASSERT(getType(typeId).storageClass == storageClass);
559
560 switch (storageClass)
Chris Forbesc25b8072018-12-10 15:10:39 -0800561 {
Ben Claytonefec1b92019-03-05 17:38:16 +0000562 case spv::StorageClassInput:
563 case spv::StorageClassOutput:
Ben Claytona1924732019-02-28 18:42:10 +0000564 ProcessInterfaceVariable(object);
Ben Claytonefec1b92019-03-05 17:38:16 +0000565 break;
Ben Clayton484e08e2019-04-05 12:11:39 +0100566
Ben Claytonefec1b92019-03-05 17:38:16 +0000567 case spv::StorageClassUniform:
568 case spv::StorageClassStorageBuffer:
Ben Clayton6b511342019-04-05 12:12:30 +0100569 object.kind = Object::Kind::DescriptorSet;
570 break;
571
Chris Forbesa30de542019-03-18 18:51:55 -0700572 case spv::StorageClassPushConstant:
Ben Claytonefec1b92019-03-05 17:38:16 +0000573 case spv::StorageClassPrivate:
574 case spv::StorageClassFunction:
Ben Claytonefec1b92019-03-05 17:38:16 +0000575 case spv::StorageClassUniformConstant:
Chris Forbesfa82c342019-04-26 16:42:38 -0700576 break; // Correctly handled.
Nicolas Capens7d867272019-04-08 22:51:08 -0400577
Ben Claytonefec1b92019-03-05 17:38:16 +0000578 case spv::StorageClassWorkgroup:
Ben Claytonecd38482019-04-19 17:11:08 -0400579 {
580 auto &elTy = getType(getType(typeId).element);
581 auto sizeInBytes = elTy.sizeInComponents * sizeof(float);
582 workgroupMemory.allocate(resultId, sizeInBytes);
583 object.kind = Object::Kind::Pointer;
584 break;
585 }
Ben Claytonefec1b92019-03-05 17:38:16 +0000586 case spv::StorageClassAtomicCounter:
587 case spv::StorageClassImage:
588 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
589 break;
590
Ben Clayton92797c22019-04-25 10:44:03 +0100591 case spv::StorageClassCrossWorkgroup:
592 UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
593 break;
594
595 case spv::StorageClassGeneric:
596 UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
597 break;
598
Ben Claytonefec1b92019-03-05 17:38:16 +0000599 default:
Nicolas Capens29090852019-03-19 16:22:35 -0400600 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
Ben Claytonefec1b92019-03-05 17:38:16 +0000601 break;
Chris Forbesc25b8072018-12-10 15:10:39 -0800602 }
Chris Forbes4a979dc2019-01-17 09:36:46 -0800603 break;
604 }
Chris Forbes296aa252018-12-27 11:48:21 -0800605
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800606 case spv::OpConstant:
Chris Forbesea81ab72019-05-14 15:20:33 -0700607 case spv::OpSpecConstant:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800608 CreateConstant(insn).constantValue[0] = insn.word(3);
609 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800610 case spv::OpConstantFalse:
Chris Forbesea81ab72019-05-14 15:20:33 -0700611 case spv::OpSpecConstantFalse:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800612 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
613 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800614 case spv::OpConstantTrue:
Chris Forbesea81ab72019-05-14 15:20:33 -0700615 case spv::OpSpecConstantTrue:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800616 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
617 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800618 case spv::OpConstantNull:
Chris Forbes0e712412019-03-18 19:31:16 -0700619 case spv::OpUndef:
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800620 {
Chris Forbes0e712412019-03-18 19:31:16 -0700621 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800622 // OpConstantNull forms a constant of arbitrary type, all zeros.
Ben Clayton9a162482019-02-25 11:54:43 +0000623 auto &object = CreateConstant(insn);
624 auto &objectTy = getType(object.type);
625 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800626 {
627 object.constantValue[i] = 0;
628 }
629 break;
630 }
631 case spv::OpConstantComposite:
Chris Forbesea81ab72019-05-14 15:20:33 -0700632 case spv::OpSpecConstantComposite:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800633 {
634 auto &object = CreateConstant(insn);
635 auto offset = 0u;
636 for (auto i = 0u; i < insn.wordCount() - 3; i++)
637 {
Ben Clayton9a162482019-02-25 11:54:43 +0000638 auto &constituent = getObject(insn.word(i + 3));
639 auto &constituentTy = getType(constituent.type);
640 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800641 object.constantValue[offset++] = constituent.constantValue[j];
642 }
Ben Clayton62758f52019-03-13 14:18:58 +0000643
644 auto objectId = Object::ID(insn.word(2));
645 auto decorationsIt = decorations.find(objectId);
646 if (decorationsIt != decorations.end() &&
647 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
648 {
649 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
650 // Decorating an object with the WorkgroupSize built-in
651 // decoration will make that object contain the dimensions
652 // of a local workgroup. If an object is decorated with the
653 // WorkgroupSize decoration, this must take precedence over
654 // any execution mode set for LocalSize.
655 // The object decorated with WorkgroupSize must be declared
656 // as a three-component vector of 32-bit integers.
657 ASSERT(getType(object.type).sizeInComponents == 3);
658 modes.WorkgroupSizeX = object.constantValue[0];
659 modes.WorkgroupSizeY = object.constantValue[1];
660 modes.WorkgroupSizeZ = object.constantValue[2];
661 }
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800662 break;
663 }
Chris Forbesea81ab72019-05-14 15:20:33 -0700664 case spv::OpSpecConstantOp:
665 EvalSpecConstantOp(insn);
666 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800667
Chris Forbesbde34082018-12-28 12:03:10 -0800668 case spv::OpCapability:
Ben Clayton9b156612019-03-13 19:48:31 +0000669 break; // Various capabilities will be declared, but none affect our code generation at this point.
Chris Forbesbde34082018-12-28 12:03:10 -0800670 case spv::OpMemoryModel:
Ben Clayton9b156612019-03-13 19:48:31 +0000671 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
672
Chris Forbes7edf5342019-02-10 22:41:21 +0000673 case spv::OpFunction:
Ben Clayton60f15ec2019-05-09 17:50:01 +0100674 {
675 auto functionId = Object::ID(insn.word(2));
676 if (functionId == entryPointFunctionId)
Ben Clayton9b156612019-03-13 19:48:31 +0000677 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100678 // Scan forward to find the function's label.
679 for (auto it = insn; it != end() && entryPointBlockId == 0; it++)
Ben Clayton9b156612019-03-13 19:48:31 +0000680 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100681 switch (it.opcode())
682 {
683 case spv::OpFunction:
684 case spv::OpFunctionParameter:
685 break;
686 case spv::OpLabel:
687 entryPointBlockId = Block::ID(it.word(1));
688 break;
689 default:
690 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
691 }
Ben Clayton9b156612019-03-13 19:48:31 +0000692 }
693 }
Ben Clayton60f15ec2019-05-09 17:50:01 +0100694 else
695 {
696 // All non-entry point functions should be inlined into an
697 // entry point function.
698 // This isn't the target entry point, so must be another
699 // entry point that we are not interested in. Just skip it.
700 for (; insn != end() && insn.opcode() != spv::OpFunctionEnd; insn++) {}
701 }
702
Ben Clayton9b156612019-03-13 19:48:31 +0000703 break;
Ben Clayton60f15ec2019-05-09 17:50:01 +0100704 }
Chris Forbes7edf5342019-02-10 22:41:21 +0000705 case spv::OpFunctionEnd:
706 // Due to preprocessing, the entrypoint and its function provide no value.
707 break;
708 case spv::OpExtInstImport:
Ben Clayton92797c22019-04-25 10:44:03 +0100709 {
Chris Forbes7edf5342019-02-10 22:41:21 +0000710 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
711 // Valid shaders will not attempt to import any other instruction sets.
Ben Clayton60f15ec2019-05-09 17:50:01 +0100712 auto ext = insn.string(2);
Ben Clayton92797c22019-04-25 10:44:03 +0100713 if (0 != strcmp("GLSL.std.450", ext))
Chris Forbes9667a5b2019-03-07 09:26:48 -0800714 {
Ben Clayton92797c22019-04-25 10:44:03 +0100715 UNSUPPORTED("SPIR-V Extension: %s", ext);
Chris Forbes9667a5b2019-03-07 09:26:48 -0800716 }
717 break;
Ben Clayton92797c22019-04-25 10:44:03 +0100718 }
Chris Forbes1776af72019-02-22 17:39:57 -0800719 case spv::OpName:
720 case spv::OpMemberName:
721 case spv::OpSource:
722 case spv::OpSourceContinued:
723 case spv::OpSourceExtension:
Chris Forbesf3a430d2019-03-08 07:51:39 -0800724 case spv::OpLine:
725 case spv::OpNoLine:
726 case spv::OpModuleProcessed:
727 case spv::OpString:
Chris Forbes1776af72019-02-22 17:39:57 -0800728 // No semantic impact
Chris Forbes7edf5342019-02-10 22:41:21 +0000729 break;
730
731 case spv::OpFunctionParameter:
732 case spv::OpFunctionCall:
Chris Forbes7edf5342019-02-10 22:41:21 +0000733 // These should have all been removed by preprocessing passes. If we see them here,
734 // our assumptions are wrong and we will probably generate wrong code.
Ben Clayton92797c22019-04-25 10:44:03 +0100735 UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
Chris Forbes7edf5342019-02-10 22:41:21 +0000736 break;
737
Chris Forbes4d503052019-03-01 17:13:57 -0800738 case spv::OpFConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100739 UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
Nicolas Capens7d867272019-04-08 22:51:08 -0400740 break;
741
Chris Forbes4d503052019-03-01 17:13:57 -0800742 case spv::OpSConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100743 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
744 break;
745
Chris Forbes4d503052019-03-01 17:13:57 -0800746 case spv::OpUConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100747 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
Chris Forbes4d503052019-03-01 17:13:57 -0800748 break;
749
Chris Forbesa71b8e92019-02-10 22:42:42 +0000750 case spv::OpLoad:
751 case spv::OpAccessChain:
Chris Forbes10fd6242019-03-15 12:27:34 -0700752 case spv::OpInBoundsAccessChain:
Chris Forbesfa82c342019-04-26 16:42:38 -0700753 case spv::OpSampledImage:
754 case spv::OpImage:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400755 {
756 // Propagate the descriptor decorations to the result.
757 Object::ID resultId = insn.word(2);
758 Object::ID pointerId = insn.word(3);
759 const auto &d = descriptorDecorations.find(pointerId);
760
761 if(d != descriptorDecorations.end())
762 {
763 descriptorDecorations[resultId] = d->second;
764 }
765
766 DefineResult(insn);
Chris Forbese6419ad2019-04-11 12:23:10 -0700767
Nicolas Capens125dba02019-04-24 02:03:22 -0400768 if (opcode == spv::OpAccessChain || opcode == spv::OpInBoundsAccessChain)
Chris Forbese6419ad2019-04-11 12:23:10 -0700769 {
770 Decorations dd{};
Chris Forbes3610ded2019-04-22 18:12:13 -0700771 ApplyDecorationsForAccessChain(&dd, &descriptorDecorations[resultId], pointerId, insn.wordCount() - 4, insn.wordPointer(4));
Chris Forbese6419ad2019-04-11 12:23:10 -0700772 // Note: offset is the one thing that does *not* propagate, as the access chain accounts for it.
773 dd.HasOffset = false;
774 decorations[resultId].Apply(dd);
775 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400776 }
777 break;
778
Chris Forbesb97a9572019-02-21 16:51:42 -0800779 case spv::OpCompositeConstruct:
Chris Forbes1bc1acf2019-02-21 18:40:33 -0800780 case spv::OpCompositeInsert:
Chris Forbesb12846d2019-02-21 18:53:58 -0800781 case spv::OpCompositeExtract:
Chris Forbes83fc5442019-02-26 22:16:07 -0800782 case spv::OpVectorShuffle:
Chris Forbesfaed9d32019-03-15 10:31:08 -0700783 case spv::OpVectorTimesScalar:
Chris Forbes57e05b82019-03-28 09:16:20 +1300784 case spv::OpMatrixTimesScalar:
Chris Forbes06f4ed72019-03-28 09:53:20 +1300785 case spv::OpMatrixTimesVector:
Chris Forbesa563dd82019-03-28 10:32:55 +1300786 case spv::OpVectorTimesMatrix:
Chris Forbes51562f12019-03-28 19:08:39 -0700787 case spv::OpMatrixTimesMatrix:
Ben Clayton3ee52992019-04-08 11:01:23 -0400788 case spv::OpOuterProduct:
Ben Clayton620f7082019-04-08 11:12:08 -0400789 case spv::OpTranspose:
Chris Forbesfaed9d32019-03-15 10:31:08 -0700790 case spv::OpVectorExtractDynamic:
791 case spv::OpVectorInsertDynamic:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400792 // Unary ops
793 case spv::OpNot:
Ben Claytonb5bfa502019-04-08 14:26:36 -0400794 case spv::OpBitFieldInsert:
Ben Claytond86db952019-04-08 13:43:11 -0400795 case spv::OpBitFieldSExtract:
796 case spv::OpBitFieldUExtract:
Ben Claytond2a46432019-04-08 11:41:45 -0400797 case spv::OpBitReverse:
Ben Clayton1eb017d2019-04-08 11:32:09 -0400798 case spv::OpBitCount:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000799 case spv::OpSNegate:
800 case spv::OpFNegate:
801 case spv::OpLogicalNot:
Ben Clayton64da4ae2019-04-19 12:34:06 -0400802 case spv::OpQuantizeToF16:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400803 // Binary ops
804 case spv::OpIAdd:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000805 case spv::OpISub:
806 case spv::OpIMul:
807 case spv::OpSDiv:
808 case spv::OpUDiv:
809 case spv::OpFAdd:
810 case spv::OpFSub:
Chris Forbes9d931532019-03-08 09:53:03 -0800811 case spv::OpFMul:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000812 case spv::OpFDiv:
Chris Forbes0e4d6ff2019-03-15 13:43:36 -0700813 case spv::OpFMod:
Chris Forbes1a4c7122019-03-15 14:50:47 -0700814 case spv::OpFRem:
Ben Claytonec1aeb82019-03-04 19:33:27 +0000815 case spv::OpFOrdEqual:
816 case spv::OpFUnordEqual:
817 case spv::OpFOrdNotEqual:
818 case spv::OpFUnordNotEqual:
819 case spv::OpFOrdLessThan:
820 case spv::OpFUnordLessThan:
821 case spv::OpFOrdGreaterThan:
822 case spv::OpFUnordGreaterThan:
823 case spv::OpFOrdLessThanEqual:
824 case spv::OpFUnordLessThanEqual:
825 case spv::OpFOrdGreaterThanEqual:
826 case spv::OpFUnordGreaterThanEqual:
Ben Claytonbb8c8e22019-03-08 12:04:00 +0000827 case spv::OpSMod:
Chris Forbes71673c82019-03-14 12:55:20 -0700828 case spv::OpSRem:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000829 case spv::OpUMod:
Ben Claytone95eeb12019-03-04 16:32:09 +0000830 case spv::OpIEqual:
831 case spv::OpINotEqual:
832 case spv::OpUGreaterThan:
833 case spv::OpSGreaterThan:
834 case spv::OpUGreaterThanEqual:
835 case spv::OpSGreaterThanEqual:
836 case spv::OpULessThan:
837 case spv::OpSLessThan:
838 case spv::OpULessThanEqual:
839 case spv::OpSLessThanEqual:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000840 case spv::OpShiftRightLogical:
841 case spv::OpShiftRightArithmetic:
842 case spv::OpShiftLeftLogical:
843 case spv::OpBitwiseOr:
844 case spv::OpBitwiseXor:
845 case spv::OpBitwiseAnd:
846 case spv::OpLogicalOr:
847 case spv::OpLogicalAnd:
Chris Forbes787b4462019-03-08 12:16:57 -0800848 case spv::OpLogicalEqual:
849 case spv::OpLogicalNotEqual:
Chris Forbese86b6dc2019-03-01 09:08:47 -0800850 case spv::OpUMulExtended:
851 case spv::OpSMulExtended:
Chris Forbes3e6f60b2019-05-08 17:28:10 -0700852 case spv::OpIAddCarry:
853 case spv::OpISubBorrow:
Chris Forbes2b287cc2019-03-01 13:24:17 -0800854 case spv::OpDot:
Chris Forbes4d503052019-03-01 17:13:57 -0800855 case spv::OpConvertFToU:
856 case spv::OpConvertFToS:
857 case spv::OpConvertSToF:
858 case spv::OpConvertUToF:
859 case spv::OpBitcast:
Ben Claytonbf943f62019-03-05 12:57:39 +0000860 case spv::OpSelect:
Chris Forbes9667a5b2019-03-07 09:26:48 -0800861 case spv::OpExtInst:
Chris Forbes3ed33ce2019-03-07 13:38:31 -0800862 case spv::OpIsInf:
863 case spv::OpIsNan:
Chris Forbes0785f692019-03-08 09:09:18 -0800864 case spv::OpAny:
865 case spv::OpAll:
Chris Forbesaff2dd02019-03-20 14:50:24 -0700866 case spv::OpDPdx:
867 case spv::OpDPdxCoarse:
868 case spv::OpDPdy:
869 case spv::OpDPdyCoarse:
870 case spv::OpFwidth:
871 case spv::OpFwidthCoarse:
872 case spv::OpDPdxFine:
873 case spv::OpDPdyFine:
874 case spv::OpFwidthFine:
Nicolas Capens5e8414e2019-03-19 16:22:35 -0400875 case spv::OpAtomicLoad:
Chris Forbes17813932019-04-18 11:45:54 -0700876 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -0700877 case spv::OpAtomicISub:
Chris Forbes17813932019-04-18 11:45:54 -0700878 case spv::OpAtomicSMin:
879 case spv::OpAtomicSMax:
880 case spv::OpAtomicUMin:
881 case spv::OpAtomicUMax:
882 case spv::OpAtomicAnd:
883 case spv::OpAtomicOr:
884 case spv::OpAtomicXor:
Chris Forbes707ed992019-04-18 18:17:35 -0700885 case spv::OpAtomicIIncrement:
886 case spv::OpAtomicIDecrement:
Chris Forbes17813932019-04-18 11:45:54 -0700887 case spv::OpAtomicExchange:
Chris Forbesa16238d2019-04-18 16:31:54 -0700888 case spv::OpAtomicCompareExchange:
Ben Clayton9fd02e02019-03-21 18:47:15 +0000889 case spv::OpPhi:
Nicolas Capens7d867272019-04-08 22:51:08 -0400890 case spv::OpImageSampleImplicitLod:
Nicolas Capens125dba02019-04-24 02:03:22 -0400891 case spv::OpImageSampleExplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -0400892 case spv::OpImageSampleDrefImplicitLod:
893 case spv::OpImageSampleDrefExplicitLod:
894 case spv::OpImageSampleProjImplicitLod:
895 case spv::OpImageSampleProjExplicitLod:
896 case spv::OpImageSampleProjDrefImplicitLod:
897 case spv::OpImageSampleProjDrefExplicitLod:
Chris Forbescd631592019-04-27 10:37:18 -0700898 case spv::OpImageFetch:
Chris Forbesb0d00ea2019-04-17 20:24:20 -0700899 case spv::OpImageQuerySize:
Ben Clayton0264d8e2019-05-08 15:39:40 +0100900 case spv::OpImageQuerySizeLod:
Ben Claytonb4001ed2019-05-10 10:21:00 +0100901 case spv::OpImageQueryLevels:
Ben Clayton2568cf72019-05-10 11:53:14 +0100902 case spv::OpImageQuerySamples:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -0700903 case spv::OpImageRead:
Chris Forbesb51f2c12019-04-18 11:01:30 -0700904 case spv::OpImageTexelPointer:
Ben Clayton32d47972019-04-19 17:08:15 -0400905 case spv::OpGroupNonUniformElect:
Ben Clayton78abf372019-05-09 15:11:58 +0100906 case spv::OpCopyObject:
Ben Claytone4605da2019-05-09 16:24:01 +0100907 case spv::OpArrayLength:
Nicolas Capens7d867272019-04-08 22:51:08 -0400908 // Instructions that yield an intermediate value or divergent pointer
909 DefineResult(insn);
Chris Forbesa71b8e92019-02-10 22:42:42 +0000910 break;
Chris Forbesa71b8e92019-02-10 22:42:42 +0000911
Chris Forbes7edf5342019-02-10 22:41:21 +0000912 case spv::OpStore:
Nicolas Capens5e8414e2019-03-19 16:22:35 -0400913 case spv::OpAtomicStore:
Chris Forbes179f0142019-04-17 20:24:44 -0700914 case spv::OpImageWrite:
Ben Claytonb5a45462019-04-30 19:21:29 +0100915 case spv::OpCopyMemory:
Ben Claytonb16c5862019-05-08 14:01:38 +0100916 case spv::OpMemoryBarrier:
Chris Forbes7edf5342019-02-10 22:41:21 +0000917 // Don't need to do anything during analysis pass
918 break;
919
Ben Claytonecfeede2019-05-08 08:51:01 +0100920 case spv::OpControlBarrier:
921 modes.ContainsControlBarriers = true;
922 break;
923
Chris Forbes9869d602019-04-18 17:26:16 -0700924 case spv::OpExtension:
925 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100926 auto ext = insn.string(1);
Chris Forbes9869d602019-04-18 17:26:16 -0700927 // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
928 // extension per Appendix A, `Vulkan Environment for SPIR-V`.
Ben Clayton92797c22019-04-25 10:44:03 +0100929 if (!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
Chris Forbescb9bf9a2019-05-10 08:13:34 -0700930 if (!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
931 if (!strcmp(ext, "SPV_KHR_16bit_storage")) break;
Chris Forbes39b07502019-05-09 09:47:06 -0700932 if (!strcmp(ext, "SPV_KHR_variable_pointers")) break;
Ben Clayton2cd983d2019-05-10 11:30:09 +0100933 if (!strcmp(ext, "SPV_KHR_device_group")) break;
Ben Clayton92797c22019-04-25 10:44:03 +0100934 UNSUPPORTED("SPIR-V Extension: %s", ext);
Chris Forbes9869d602019-04-18 17:26:16 -0700935 break;
936 }
937
Chris Forbes4a979dc2019-01-17 09:36:46 -0800938 default:
Nicolas Capens125dba02019-04-24 02:03:22 -0400939 UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
Chris Forbesaf4ed532018-12-06 18:33:27 -0800940 }
941 }
Ben Clayton64f78f52019-03-21 17:21:06 +0000942
Ben Clayton60f15ec2019-05-09 17:50:01 +0100943 ASSERT_MSG(entryPointFunctionId != 0, "Entry point '%s' not found", createInfo->pName);
Ben Claytonfe3f0132019-03-26 11:10:16 +0000944 AssignBlockIns();
945 }
946
Ben Clayton513ed1d2019-03-28 16:07:00 +0000947 void SpirvShader::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000948 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000949 if (reachable.count(id) == 0)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000950 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000951 reachable.emplace(id);
952 for (auto out : getBlock(id).outs)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000953 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000954 TraverseReachableBlocks(out, reachable);
Ben Claytonfe3f0132019-03-26 11:10:16 +0000955 }
956 }
957 }
958
959 void SpirvShader::AssignBlockIns()
960 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000961 Block::Set reachable;
Ben Clayton60f15ec2019-05-09 17:50:01 +0100962 TraverseReachableBlocks(entryPointBlockId, reachable);
Ben Clayton513ed1d2019-03-28 16:07:00 +0000963
Ben Clayton64f78f52019-03-21 17:21:06 +0000964 for (auto &it : blocks)
965 {
966 auto &blockId = it.first;
Ben Clayton513ed1d2019-03-28 16:07:00 +0000967 if (reachable.count(blockId) > 0)
Ben Clayton64f78f52019-03-21 17:21:06 +0000968 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000969 for (auto &outId : it.second.outs)
970 {
971 auto outIt = blocks.find(outId);
972 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
973 auto &out = outIt->second;
974 out.ins.emplace(blockId);
975 }
Ben Clayton64f78f52019-03-21 17:21:06 +0000976 }
977 }
Chris Forbesaf4ed532018-12-06 18:33:27 -0800978 }
979
Ben Clayton0bb83b82019-02-26 11:41:07 +0000980 void SpirvShader::DeclareType(InsnIterator insn)
981 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000982 Type::ID resultId = insn.word(1);
Ben Clayton0bb83b82019-02-26 11:41:07 +0000983
984 auto &type = types[resultId];
985 type.definition = insn;
986 type.sizeInComponents = ComputeTypeSize(insn);
987
988 // A structure is a builtin block if it has a builtin
989 // member. All members of such a structure are builtins.
990 switch (insn.opcode())
991 {
992 case spv::OpTypeStruct:
993 {
994 auto d = memberDecorations.find(resultId);
995 if (d != memberDecorations.end())
996 {
997 for (auto &m : d->second)
998 {
999 if (m.HasBuiltIn)
1000 {
1001 type.isBuiltInBlock = true;
1002 break;
1003 }
1004 }
1005 }
1006 break;
1007 }
1008 case spv::OpTypePointer:
1009 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001010 Type::ID elementTypeId = insn.word(3);
Ben Clayton0bb83b82019-02-26 11:41:07 +00001011 type.element = elementTypeId;
1012 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
1013 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
1014 break;
1015 }
1016 case spv::OpTypeVector:
1017 case spv::OpTypeMatrix:
1018 case spv::OpTypeArray:
1019 case spv::OpTypeRuntimeArray:
1020 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001021 Type::ID elementTypeId = insn.word(2);
Ben Clayton0bb83b82019-02-26 11:41:07 +00001022 type.element = elementTypeId;
1023 break;
1024 }
1025 default:
1026 break;
1027 }
1028 }
1029
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001030 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
1031 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001032 Type::ID typeId = insn.word(1);
1033 Object::ID resultId = insn.word(2);
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001034 auto &object = defs[resultId];
Ben Clayton9a162482019-02-25 11:54:43 +00001035 auto &objectTy = getType(typeId);
1036 object.type = typeId;
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001037 object.kind = Object::Kind::Constant;
1038 object.definition = insn;
Ben Clayton9a162482019-02-25 11:54:43 +00001039 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001040 return object;
1041 }
1042
Chris Forbes049ff382019-02-02 15:16:43 -08001043 void SpirvShader::ProcessInterfaceVariable(Object &object)
Chris Forbesbde34082018-12-28 12:03:10 -08001044 {
Ben Clayton9a162482019-02-25 11:54:43 +00001045 auto &objectTy = getType(object.type);
Ben Clayton6fae32c2019-02-28 20:06:42 +00001046 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
Chris Forbesbde34082018-12-28 12:03:10 -08001047
Nicolas Capens29090852019-03-19 16:22:35 -04001048 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton9a162482019-02-25 11:54:43 +00001049 auto pointeeTy = getType(objectTy.element);
Chris Forbesbde34082018-12-28 12:03:10 -08001050
Ben Clayton9a162482019-02-25 11:54:43 +00001051 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
1052 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
1053
Nicolas Capens29090852019-03-19 16:22:35 -04001054 ASSERT(object.opcode() == spv::OpVariable);
Ben Claytonaf973b62019-03-13 18:19:20 +00001055 Object::ID resultId = object.definition.word(2);
Ben Clayton9a162482019-02-25 11:54:43 +00001056
1057 if (objectTy.isBuiltInBlock)
Chris Forbesbde34082018-12-28 12:03:10 -08001058 {
1059 // walk the builtin block, registering each of its members separately.
Ben Clayton9a162482019-02-25 11:54:43 +00001060 auto m = memberDecorations.find(objectTy.element);
Ben Clayton6fae32c2019-02-28 20:06:42 +00001061 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
Ben Clayton9a162482019-02-25 11:54:43 +00001062 auto &structType = pointeeTy.definition;
Chris Forbesbde34082018-12-28 12:03:10 -08001063 auto offset = 0u;
1064 auto word = 2u;
1065 for (auto &member : m->second)
1066 {
Chris Forbes840809a2019-01-14 14:30:20 -08001067 auto &memberType = getType(structType.word(word));
Chris Forbesbde34082018-12-28 12:03:10 -08001068
1069 if (member.HasBuiltIn)
1070 {
1071 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
1072 }
1073
1074 offset += memberType.sizeInComponents;
1075 ++word;
1076 }
1077 return;
1078 }
1079
1080 auto d = decorations.find(resultId);
1081 if (d != decorations.end() && d->second.HasBuiltIn)
1082 {
Ben Clayton9a162482019-02-25 11:54:43 +00001083 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
Chris Forbesbde34082018-12-28 12:03:10 -08001084 }
1085 else
1086 {
Chris Forbes049ff382019-02-02 15:16:43 -08001087 object.kind = Object::Kind::InterfaceVariable;
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001088 VisitInterface(resultId,
1089 [&userDefinedInterface](Decorations const &d, AttribType type) {
1090 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
1091 auto scalarSlot = (d.Location << 2) | d.Component;
Ben Clayton6fae32c2019-02-28 20:06:42 +00001092 ASSERT(scalarSlot >= 0 &&
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001093 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
1094
1095 auto &slot = userDefinedInterface[scalarSlot];
1096 slot.Type = type;
1097 slot.Flat = d.Flat;
1098 slot.NoPerspective = d.NoPerspective;
1099 slot.Centroid = d.Centroid;
1100 });
Chris Forbesbde34082018-12-28 12:03:10 -08001101 }
1102 }
1103
Chris Forbesaf4ed532018-12-06 18:33:27 -08001104 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
1105 {
1106 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
Chris Forbes4a979dc2019-01-17 09:36:46 -08001107 switch (mode)
1108 {
1109 case spv::ExecutionModeEarlyFragmentTests:
1110 modes.EarlyFragmentTests = true;
1111 break;
1112 case spv::ExecutionModeDepthReplacing:
1113 modes.DepthReplacing = true;
1114 break;
1115 case spv::ExecutionModeDepthGreater:
1116 modes.DepthGreater = true;
1117 break;
1118 case spv::ExecutionModeDepthLess:
1119 modes.DepthLess = true;
1120 break;
1121 case spv::ExecutionModeDepthUnchanged:
1122 modes.DepthUnchanged = true;
1123 break;
1124 case spv::ExecutionModeLocalSize:
Ben Clayton62758f52019-03-13 14:18:58 +00001125 modes.WorkgroupSizeX = insn.word(3);
1126 modes.WorkgroupSizeY = insn.word(4);
1127 modes.WorkgroupSizeZ = insn.word(5);
Chris Forbes4a979dc2019-01-17 09:36:46 -08001128 break;
1129 case spv::ExecutionModeOriginUpperLeft:
1130 // This is always the case for a Vulkan shader. Do nothing.
1131 break;
1132 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001133 UNREACHABLE("Execution mode: %d", int(mode));
Chris Forbesaf4ed532018-12-06 18:33:27 -08001134 }
1135 }
Chris Forbes739a7fb2018-12-08 13:09:40 -08001136
Ben Clayton9b156612019-03-13 19:48:31 +00001137 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
Chris Forbes739a7fb2018-12-08 13:09:40 -08001138 {
1139 // Types are always built from the bottom up (with the exception of forward ptrs, which
1140 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
1141 // already been described (and so their sizes determined)
1142 switch (insn.opcode())
1143 {
1144 case spv::OpTypeVoid:
1145 case spv::OpTypeSampler:
1146 case spv::OpTypeImage:
1147 case spv::OpTypeSampledImage:
1148 case spv::OpTypeFunction:
1149 case spv::OpTypeRuntimeArray:
1150 // Objects that don't consume any space.
1151 // Descriptor-backed objects currently only need exist at compile-time.
1152 // Runtime arrays don't appear in places where their size would be interesting
1153 return 0;
1154
1155 case spv::OpTypeBool:
1156 case spv::OpTypeFloat:
1157 case spv::OpTypeInt:
1158 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
1159 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
1160 return 1;
1161
1162 case spv::OpTypeVector:
1163 case spv::OpTypeMatrix:
1164 // Vectors and matrices both consume element count * element size.
Chris Forbes840809a2019-01-14 14:30:20 -08001165 return getType(insn.word(2)).sizeInComponents * insn.word(3);
Chris Forbes739a7fb2018-12-08 13:09:40 -08001166
1167 case spv::OpTypeArray:
Chris Forbes5be4d702018-12-27 16:12:31 -08001168 {
1169 // Element count * element size. Array sizes come from constant ids.
Chris Forbesea81ab72019-05-14 15:20:33 -07001170 auto arraySize = GetConstScalarInt(insn.word(3));
Chris Forbes840809a2019-01-14 14:30:20 -08001171 return getType(insn.word(2)).sizeInComponents * arraySize;
Chris Forbes5be4d702018-12-27 16:12:31 -08001172 }
Chris Forbes739a7fb2018-12-08 13:09:40 -08001173
1174 case spv::OpTypeStruct:
1175 {
1176 uint32_t size = 0;
1177 for (uint32_t i = 2u; i < insn.wordCount(); i++)
1178 {
Chris Forbes840809a2019-01-14 14:30:20 -08001179 size += getType(insn.word(i)).sizeInComponents;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001180 }
1181 return size;
1182 }
1183
1184 case spv::OpTypePointer:
Chris Forbes0f59a2c2019-02-10 23:03:12 +00001185 // Runtime representation of a pointer is a per-lane index.
1186 // Note: clients are expected to look through the pointer if they want the pointee size instead.
1187 return 1;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001188
1189 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001190 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Clayton60a3d6f2019-02-26 17:24:46 +00001191 return 0;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001192 }
1193 }
Chris Forbesc25b8072018-12-10 15:10:39 -08001194
Ben Clayton831db962019-02-27 14:57:18 +00001195 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
1196 {
1197 switch (storageClass)
1198 {
1199 case spv::StorageClassUniform:
1200 case spv::StorageClassStorageBuffer:
Chris Forbesa30de542019-03-18 18:51:55 -07001201 case spv::StorageClassPushConstant:
Ben Claytonecd38482019-04-19 17:11:08 -04001202 case spv::StorageClassWorkgroup:
Ben Clayton831db962019-02-27 14:57:18 +00001203 return false;
1204 default:
1205 return true;
1206 }
1207 }
1208
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001209 template<typename F>
Ben Claytonaf973b62019-03-13 18:19:20 +00001210 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
Chris Forbes5839dcf2018-12-10 19:02:58 -08001211 {
1212 // Recursively walks variable definition and its type tree, taking into account
1213 // any explicit Location or Component decorations encountered; where explicit
1214 // Locations or Components are not specified, assigns them sequentially.
1215 // Collected decorations are carried down toward the leaves and across
1216 // siblings; Effect of decorations intentionally does not flow back up the tree.
1217 //
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001218 // F is a functor to be called with the effective decoration set for every component.
1219 //
1220 // Returns the next available location, and calls f().
Chris Forbes5839dcf2018-12-10 19:02:58 -08001221
1222 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
1223
Chris Forbes49d664d2019-02-12 19:24:50 +00001224 ApplyDecorationsForId(&d, id);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001225
Chris Forbes840809a2019-01-14 14:30:20 -08001226 auto const &obj = getType(id);
Nicolas Capens29090852019-03-19 16:22:35 -04001227 switch(obj.opcode())
Chris Forbes5839dcf2018-12-10 19:02:58 -08001228 {
Chris Forbes5839dcf2018-12-10 19:02:58 -08001229 case spv::OpTypePointer:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001230 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001231 case spv::OpTypeMatrix:
1232 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
1233 {
1234 // consumes same components of N consecutive locations
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001235 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001236 }
1237 return d.Location;
1238 case spv::OpTypeVector:
1239 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
1240 {
1241 // consumes N consecutive components in the same location
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001242 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001243 }
1244 return d.Location + 1;
1245 case spv::OpTypeFloat:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001246 f(d, ATTRIBTYPE_FLOAT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001247 return d.Location + 1;
1248 case spv::OpTypeInt:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001249 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001250 return d.Location + 1;
1251 case spv::OpTypeBool:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001252 f(d, ATTRIBTYPE_UINT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001253 return d.Location + 1;
1254 case spv::OpTypeStruct:
1255 {
Chris Forbes5839dcf2018-12-10 19:02:58 -08001256 // iterate over members, which may themselves have Location/Component decorations
1257 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
1258 {
Chris Forbes49d664d2019-02-12 19:24:50 +00001259 ApplyDecorationsForIdMember(&d, id, i);
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001260 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001261 d.Component = 0; // Implicit locations always have component=0
1262 }
1263 return d.Location;
1264 }
Chris Forbes5be4d702018-12-27 16:12:31 -08001265 case spv::OpTypeArray:
1266 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001267 auto arraySize = GetConstScalarInt(obj.definition.word(3));
Chris Forbes5be4d702018-12-27 16:12:31 -08001268 for (auto i = 0u; i < arraySize; i++)
1269 {
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001270 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5be4d702018-12-27 16:12:31 -08001271 }
1272 return d.Location;
1273 }
Chris Forbes5839dcf2018-12-10 19:02:58 -08001274 default:
1275 // Intentionally partial; most opcodes do not participate in type hierarchies
1276 return 0;
1277 }
1278 }
1279
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001280 template<typename F>
Ben Claytonaf973b62019-03-13 18:19:20 +00001281 void SpirvShader::VisitInterface(Object::ID id, F f) const
Chris Forbes5839dcf2018-12-10 19:02:58 -08001282 {
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001283 // Walk a variable definition and call f for each component in it.
Chris Forbes5839dcf2018-12-10 19:02:58 -08001284 Decorations d{};
Chris Forbes49d664d2019-02-12 19:24:50 +00001285 ApplyDecorationsForId(&d, id);
Chris Forbes1c658232019-02-01 17:12:25 -08001286
1287 auto def = getObject(id).definition;
Ben Clayton6fae32c2019-02-28 20:06:42 +00001288 ASSERT(def.opcode() == spv::OpVariable);
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001289 VisitInterfaceInner<F>(def.word(1), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001290 }
1291
Chris Forbese6419ad2019-04-11 12:23:10 -07001292 template<typename F>
1293 void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, F f) const
1294 {
1295 // Walk a type tree in an explicitly laid out storage class, calling
1296 // a functor for each scalar element within the object.
1297
1298 // The functor's first parameter is the index of the scalar element;
Ben Clayton97035bd2019-04-16 11:35:38 -04001299 // the second parameter is the offset (in bytes) from the base of the
1300 // object.
Chris Forbese6419ad2019-04-11 12:23:10 -07001301
1302 ApplyDecorationsForId(&d, id);
1303 auto const &type = getType(id);
1304
1305 if (d.HasOffset)
1306 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001307 offset += d.Offset;
Chris Forbese6419ad2019-04-11 12:23:10 -07001308 d.HasOffset = false;
1309 }
1310
1311 switch (type.opcode())
1312 {
1313 case spv::OpTypePointer:
1314 VisitMemoryObjectInner<F>(type.definition.word(3), d, index, offset, f);
1315 break;
1316 case spv::OpTypeInt:
1317 case spv::OpTypeFloat:
1318 f(index++, offset);
1319 break;
1320 case spv::OpTypeVector:
Chris Forbes98e6b962019-04-12 11:58:58 -07001321 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001322 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
Chris Forbese6419ad2019-04-11 12:23:10 -07001323 for (auto i = 0u; i < type.definition.word(3); i++)
1324 {
Chris Forbes98e6b962019-04-12 11:58:58 -07001325 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001326 }
1327 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001328 }
Chris Forbese6419ad2019-04-11 12:23:10 -07001329 case spv::OpTypeMatrix:
Chris Forbes98e6b962019-04-12 11:58:58 -07001330 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001331 auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
Chris Forbes98e6b962019-04-12 11:58:58 -07001332 d.InsideMatrix = true;
Chris Forbese6419ad2019-04-11 12:23:10 -07001333 for (auto i = 0u; i < type.definition.word(3); i++)
1334 {
1335 ASSERT(d.HasMatrixStride);
Chris Forbes98e6b962019-04-12 11:58:58 -07001336 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001337 }
1338 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001339 }
Chris Forbese6419ad2019-04-11 12:23:10 -07001340 case spv::OpTypeStruct:
1341 for (auto i = 0u; i < type.definition.wordCount() - 2; i++)
1342 {
1343 ApplyDecorationsForIdMember(&d, id, i);
1344 VisitMemoryObjectInner<F>(type.definition.word(i + 2), d, index, offset, f);
1345 }
1346 break;
1347 case spv::OpTypeArray:
1348 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001349 auto arraySize = GetConstScalarInt(type.definition.word(3));
Chris Forbese6419ad2019-04-11 12:23:10 -07001350 for (auto i = 0u; i < arraySize; i++)
1351 {
1352 ASSERT(d.HasArrayStride);
Ben Clayton97035bd2019-04-16 11:35:38 -04001353 VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001354 }
1355 break;
1356 }
1357 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001358 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbese6419ad2019-04-11 12:23:10 -07001359 }
1360 }
1361
1362 template<typename F>
1363 void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, F f) const
1364 {
1365 auto typeId = getObject(id).type;
1366 auto const & type = getType(typeId);
1367 if (!IsStorageInterleavedByLane(type.storageClass)) // TODO: really "is explicit layout"
1368 {
1369 Decorations d{};
1370 ApplyDecorationsForId(&d, id);
1371 uint32_t index = 0;
1372 VisitMemoryObjectInner<F>(typeId, d, index, 0, f);
1373 }
1374 else
1375 {
1376 // Objects without explicit layout are tightly packed.
1377 for (auto i = 0u; i < getType(type.element).sizeInComponents; i++)
1378 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001379 f(i, i * sizeof(float));
Chris Forbese6419ad2019-04-11 12:23:10 -07001380 }
1381 }
1382 }
1383
Ben Clayton3d497382019-04-08 16:16:12 -04001384 SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, int arrayIndex, SpirvRoutine *routine) const
Ben Clayton484e08e2019-04-05 12:11:39 +01001385 {
1386 auto &object = getObject(id);
1387 switch (object.kind)
1388 {
Ben Clayton1d514f32019-04-19 16:11:18 -04001389 case Object::Kind::Pointer:
Ben Clayton484e08e2019-04-05 12:11:39 +01001390 case Object::Kind::InterfaceVariable:
Ben Clayton5f7e9112019-04-16 11:03:40 -04001391 return routine->getPointer(id);
Ben Clayton484e08e2019-04-05 12:11:39 +01001392
Ben Clayton6b511342019-04-05 12:12:30 +01001393 case Object::Kind::DescriptorSet:
1394 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001395 const auto &d = descriptorDecorations.at(id);
1396 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
Ben Clayton6b511342019-04-05 12:12:30 +01001397 ASSERT(d.Binding >= 0);
1398
1399 auto set = routine->getPointer(id);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001400
Ben Clayton6b511342019-04-05 12:12:30 +01001401 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Ben Clayton8c56e8d2019-04-25 08:24:01 +01001402 ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
Alexis Hetu5078d482019-04-10 15:00:25 -04001403 int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
Ben Clayton6b511342019-04-05 12:12:30 +01001404
Chris Forbesbfbdd892019-04-27 12:11:29 -07001405 Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
1406 Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
1407 Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
Ben Clayton6b511342019-04-05 12:12:30 +01001408 if (setLayout->isBindingDynamic(d.Binding))
1409 {
1410 uint32_t dynamicBindingIndex =
1411 routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
1412 setLayout->getDynamicDescriptorOffset(d.Binding) +
1413 arrayIndex;
Chris Forbesbfbdd892019-04-27 12:11:29 -07001414 Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex];
1415 Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
1416 return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
Ben Clayton6b511342019-04-05 12:12:30 +01001417 }
Chris Forbesbfbdd892019-04-27 12:11:29 -07001418 else
1419 {
1420 return SIMD::Pointer(data, size);
1421 }
Ben Clayton6b511342019-04-05 12:12:30 +01001422 }
1423
Ben Clayton484e08e2019-04-05 12:11:39 +01001424 default:
1425 UNREACHABLE("Invalid pointer kind %d", int(object.kind));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001426 return SIMD::Pointer(Pointer<Byte>(), 0);
Ben Clayton484e08e2019-04-05 12:11:39 +01001427 }
1428 }
1429
Chris Forbes3610ded2019-04-22 18:12:13 -07001430 void SpirvShader::ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const
Chris Forbese6419ad2019-04-11 12:23:10 -07001431 {
1432 ApplyDecorationsForId(d, baseId);
1433 auto &baseObject = getObject(baseId);
1434 ApplyDecorationsForId(d, baseObject.type);
1435 auto typeId = getType(baseObject.type).element;
1436
1437 for (auto i = 0u; i < numIndexes; i++)
1438 {
1439 ApplyDecorationsForId(d, typeId);
1440 auto & type = getType(typeId);
1441 switch (type.opcode())
1442 {
1443 case spv::OpTypeStruct:
1444 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001445 int memberIndex = GetConstScalarInt(indexIds[i]);
Chris Forbese6419ad2019-04-11 12:23:10 -07001446 ApplyDecorationsForIdMember(d, typeId, memberIndex);
1447 typeId = type.definition.word(2u + memberIndex);
1448 break;
1449 }
1450 case spv::OpTypeArray:
1451 case spv::OpTypeRuntimeArray:
Chris Forbes3610ded2019-04-22 18:12:13 -07001452 if (dd->InputAttachmentIndex >= 0)
1453 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001454 dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
Chris Forbes3610ded2019-04-22 18:12:13 -07001455 }
1456 typeId = type.element;
1457 break;
Chris Forbese6419ad2019-04-11 12:23:10 -07001458 case spv::OpTypeVector:
1459 typeId = type.element;
1460 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001461 case spv::OpTypeMatrix:
1462 typeId = type.element;
1463 d->InsideMatrix = true;
1464 break;
Chris Forbese6419ad2019-04-11 12:23:10 -07001465 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001466 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
Chris Forbese6419ad2019-04-11 12:23:10 -07001467 }
1468 }
1469 }
1470
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001471 SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
Chris Forbesa30de542019-03-18 18:51:55 -07001472 {
1473 // Produce a offset into external memory in sizeof(float) units
1474
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001475 auto &baseObject = getObject(baseId);
Chris Forbesa30de542019-03-18 18:51:55 -07001476 Type::ID typeId = getType(baseObject.type).element;
Ben Clayton484e08e2019-04-05 12:11:39 +01001477 Decorations d = {};
Chris Forbesfe1dd4b2019-03-19 09:06:19 -07001478 ApplyDecorationsForId(&d, baseObject.type);
Chris Forbesa30de542019-03-18 18:51:55 -07001479
Alexis Hetu5078d482019-04-10 15:00:25 -04001480 uint32_t arrayIndex = 0;
Ben Clayton6b511342019-04-05 12:12:30 +01001481 if (baseObject.kind == Object::Kind::DescriptorSet)
1482 {
1483 auto type = getType(typeId).definition.opcode();
1484 if (type == spv::OpTypeArray || type == spv::OpTypeRuntimeArray)
1485 {
1486 ASSERT(getObject(indexIds[0]).kind == Object::Kind::Constant);
Chris Forbesea81ab72019-05-14 15:20:33 -07001487 arrayIndex = GetConstScalarInt(indexIds[0]);
Ben Clayton6b511342019-04-05 12:12:30 +01001488
1489 numIndexes--;
1490 indexIds++;
1491 typeId = getType(typeId).element;
1492 }
1493 }
1494
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001495 auto ptr = GetPointerToData(baseId, arrayIndex, routine);
Ben Clayton484e08e2019-04-05 12:11:39 +01001496
1497 int constantOffset = 0;
Chris Forbesa30de542019-03-18 18:51:55 -07001498
1499 for (auto i = 0u; i < numIndexes; i++)
1500 {
1501 auto & type = getType(typeId);
Chris Forbese6419ad2019-04-11 12:23:10 -07001502 ApplyDecorationsForId(&d, typeId);
1503
Chris Forbesa30de542019-03-18 18:51:55 -07001504 switch (type.definition.opcode())
1505 {
1506 case spv::OpTypeStruct:
1507 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001508 int memberIndex = GetConstScalarInt(indexIds[i]);
Chris Forbesa30de542019-03-18 18:51:55 -07001509 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
1510 ASSERT(d.HasOffset);
Ben Clayton97035bd2019-04-16 11:35:38 -04001511 constantOffset += d.Offset;
Chris Forbesa30de542019-03-18 18:51:55 -07001512 typeId = type.definition.word(2u + memberIndex);
1513 break;
1514 }
1515 case spv::OpTypeArray:
1516 case spv::OpTypeRuntimeArray:
1517 {
1518 // TODO: b/127950082: Check bounds.
Chris Forbesa30de542019-03-18 18:51:55 -07001519 ASSERT(d.HasArrayStride);
1520 auto & obj = getObject(indexIds[i]);
1521 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001522 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001523 constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001524 }
Chris Forbesa30de542019-03-18 18:51:55 -07001525 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001526 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001527 ptr += SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001528 }
Chris Forbesa30de542019-03-18 18:51:55 -07001529 typeId = type.element;
1530 break;
1531 }
1532 case spv::OpTypeMatrix:
1533 {
1534 // TODO: b/127950082: Check bounds.
Chris Forbesa30de542019-03-18 18:51:55 -07001535 ASSERT(d.HasMatrixStride);
Chris Forbes98e6b962019-04-12 11:58:58 -07001536 d.InsideMatrix = true;
Ben Clayton97035bd2019-04-16 11:35:38 -04001537 auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
Chris Forbesa30de542019-03-18 18:51:55 -07001538 auto & obj = getObject(indexIds[i]);
1539 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001540 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001541 constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001542 }
Chris Forbesa30de542019-03-18 18:51:55 -07001543 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001544 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001545 ptr += SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001546 }
Chris Forbesa30de542019-03-18 18:51:55 -07001547 typeId = type.element;
1548 break;
1549 }
1550 case spv::OpTypeVector:
1551 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001552 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
Chris Forbesa30de542019-03-18 18:51:55 -07001553 auto & obj = getObject(indexIds[i]);
1554 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001555 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001556 constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001557 }
Chris Forbesa30de542019-03-18 18:51:55 -07001558 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001559 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001560 ptr += SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001561 }
Chris Forbesa30de542019-03-18 18:51:55 -07001562 typeId = type.element;
1563 break;
1564 }
1565 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001566 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
Chris Forbesa30de542019-03-18 18:51:55 -07001567 }
1568 }
1569
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001570 ptr += constantOffset;
Ben Clayton3d497382019-04-08 16:16:12 -04001571 return ptr;
Chris Forbesa30de542019-03-18 18:51:55 -07001572 }
1573
Ben Clayton5f7e9112019-04-16 11:03:40 -04001574 SIMD::Pointer SpirvShader::WalkAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
Chris Forbes38f85b32019-02-12 20:10:05 +00001575 {
Chris Forbes38f85b32019-02-12 20:10:05 +00001576 // TODO: avoid doing per-lane work in some cases if we can?
1577
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001578 auto &baseObject = getObject(baseId);
Ben Claytonaf973b62019-03-13 18:19:20 +00001579 Type::ID typeId = getType(baseObject.type).element;
Chris Forbes38f85b32019-02-12 20:10:05 +00001580
Ben Clayton5f7e9112019-04-16 11:03:40 -04001581 auto ptr = routine->getPointer(baseId);
1582
1583 int constantOffset = 0;
Chris Forbes38f85b32019-02-12 20:10:05 +00001584
1585 for (auto i = 0u; i < numIndexes; i++)
1586 {
1587 auto & type = getType(typeId);
Nicolas Capens29090852019-03-19 16:22:35 -04001588 switch(type.opcode())
Chris Forbes38f85b32019-02-12 20:10:05 +00001589 {
1590 case spv::OpTypeStruct:
1591 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001592 int memberIndex = GetConstScalarInt(indexIds[i]);
Chris Forbes38f85b32019-02-12 20:10:05 +00001593 int offsetIntoStruct = 0;
1594 for (auto j = 0; j < memberIndex; j++) {
Chris Forbes58bee562019-02-19 17:41:41 -08001595 auto memberType = type.definition.word(2u + j);
Ben Clayton97035bd2019-04-16 11:35:38 -04001596 offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
Chris Forbes38f85b32019-02-12 20:10:05 +00001597 }
Chris Forbes6397ed02019-02-15 16:39:17 -08001598 constantOffset += offsetIntoStruct;
Chris Forbes58bee562019-02-19 17:41:41 -08001599 typeId = type.definition.word(2u + memberIndex);
Chris Forbes38f85b32019-02-12 20:10:05 +00001600 break;
1601 }
1602
1603 case spv::OpTypeVector:
1604 case spv::OpTypeMatrix:
1605 case spv::OpTypeArray:
Ben Claytonfa8603c2019-03-08 16:51:42 +00001606 case spv::OpTypeRuntimeArray:
Chris Forbes38f85b32019-02-12 20:10:05 +00001607 {
Ben Claytonfa8603c2019-03-08 16:51:42 +00001608 // TODO: b/127950082: Check bounds.
Chris Forbes0b092cd2019-04-19 09:02:14 -07001609 if (getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001610 {
Chris Forbes0b092cd2019-04-19 09:02:14 -07001611 // indexing into an array of descriptors.
1612 auto &obj = getObject(indexIds[i]);
1613 if (obj.kind != Object::Kind::Constant)
1614 {
Ben Clayton92797c22019-04-25 10:44:03 +01001615 UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
Chris Forbes0b092cd2019-04-19 09:02:14 -07001616 }
1617
1618 auto d = descriptorDecorations.at(baseId);
1619 ASSERT(d.DescriptorSet >= 0);
1620 ASSERT(d.Binding >= 0);
1621 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1622 auto stride = setLayout->getBindingStride(d.Binding);
Chris Forbesea81ab72019-05-14 15:20:33 -07001623 ptr.base += stride * GetConstScalarInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001624 }
Chris Forbes38f85b32019-02-12 20:10:05 +00001625 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001626 {
Chris Forbes0b092cd2019-04-19 09:02:14 -07001627 auto stride = getType(type.element).sizeInComponents * sizeof(float);
1628 auto & obj = getObject(indexIds[i]);
1629 if (obj.kind == Object::Kind::Constant)
1630 {
Chris Forbesea81ab72019-05-14 15:20:33 -07001631 ptr += stride * GetConstScalarInt(indexIds[i]);
Chris Forbes0b092cd2019-04-19 09:02:14 -07001632 }
1633 else
1634 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001635 ptr += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
Chris Forbes0b092cd2019-04-19 09:02:14 -07001636 }
Ben Clayton5f7e9112019-04-16 11:03:40 -04001637 }
Ben Clayton9a162482019-02-25 11:54:43 +00001638 typeId = type.element;
Chris Forbes38f85b32019-02-12 20:10:05 +00001639 break;
1640 }
1641
1642 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001643 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbes38f85b32019-02-12 20:10:05 +00001644 }
1645 }
1646
Ben Clayton5f7e9112019-04-16 11:03:40 -04001647 if (constantOffset != 0)
1648 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001649 ptr += constantOffset;
Ben Clayton5f7e9112019-04-16 11:03:40 -04001650 }
1651 return ptr;
Chris Forbes38f85b32019-02-12 20:10:05 +00001652 }
1653
Ben Claytonaf973b62019-03-13 18:19:20 +00001654 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
Chris Forbes9638b942019-02-21 18:39:31 -08001655 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001656 uint32_t componentOffset = 0;
Chris Forbes9638b942019-02-21 18:39:31 -08001657
1658 for (auto i = 0u; i < numIndexes; i++)
1659 {
1660 auto & type = getType(typeId);
Nicolas Capens29090852019-03-19 16:22:35 -04001661 switch(type.opcode())
Chris Forbes9638b942019-02-21 18:39:31 -08001662 {
1663 case spv::OpTypeStruct:
1664 {
1665 int memberIndex = indexes[i];
1666 int offsetIntoStruct = 0;
1667 for (auto j = 0; j < memberIndex; j++) {
1668 auto memberType = type.definition.word(2u + j);
1669 offsetIntoStruct += getType(memberType).sizeInComponents;
1670 }
Ben Clayton97035bd2019-04-16 11:35:38 -04001671 componentOffset += offsetIntoStruct;
Chris Forbes9638b942019-02-21 18:39:31 -08001672 typeId = type.definition.word(2u + memberIndex);
1673 break;
1674 }
1675
1676 case spv::OpTypeVector:
1677 case spv::OpTypeMatrix:
1678 case spv::OpTypeArray:
1679 {
1680 auto elementType = type.definition.word(2);
1681 auto stride = getType(elementType).sizeInComponents;
Ben Clayton97035bd2019-04-16 11:35:38 -04001682 componentOffset += stride * indexes[i];
Chris Forbes9638b942019-02-21 18:39:31 -08001683 typeId = elementType;
1684 break;
1685 }
1686
1687 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001688 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbes9638b942019-02-21 18:39:31 -08001689 }
1690 }
1691
Ben Clayton97035bd2019-04-16 11:35:38 -04001692 return componentOffset;
Chris Forbes9638b942019-02-21 18:39:31 -08001693 }
1694
Chris Forbesc25b8072018-12-10 15:10:39 -08001695 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
1696 {
1697 switch (decoration)
1698 {
1699 case spv::DecorationLocation:
1700 HasLocation = true;
1701 Location = static_cast<int32_t>(arg);
1702 break;
1703 case spv::DecorationComponent:
1704 HasComponent = true;
1705 Component = arg;
1706 break;
1707 case spv::DecorationBuiltIn:
1708 HasBuiltIn = true;
1709 BuiltIn = static_cast<spv::BuiltIn>(arg);
1710 break;
1711 case spv::DecorationFlat:
1712 Flat = true;
1713 break;
1714 case spv::DecorationNoPerspective:
Chris Forbes5839dcf2018-12-10 19:02:58 -08001715 NoPerspective = true;
Chris Forbesc25b8072018-12-10 15:10:39 -08001716 break;
1717 case spv::DecorationCentroid:
1718 Centroid = true;
1719 break;
1720 case spv::DecorationBlock:
1721 Block = true;
1722 break;
1723 case spv::DecorationBufferBlock:
1724 BufferBlock = true;
1725 break;
Chris Forbes65321072019-03-07 16:13:56 -08001726 case spv::DecorationOffset:
1727 HasOffset = true;
1728 Offset = static_cast<int32_t>(arg);
1729 break;
1730 case spv::DecorationArrayStride:
1731 HasArrayStride = true;
1732 ArrayStride = static_cast<int32_t>(arg);
1733 break;
1734 case spv::DecorationMatrixStride:
1735 HasMatrixStride = true;
1736 MatrixStride = static_cast<int32_t>(arg);
1737 break;
Ben Clayton8448cc52019-04-09 16:24:31 -04001738 case spv::DecorationRelaxedPrecision:
1739 RelaxedPrecision = true;
1740 break;
Chris Forbes1ba5ba72019-04-12 11:37:21 -07001741 case spv::DecorationRowMajor:
1742 HasRowMajor = true;
1743 RowMajor = true;
1744 break;
1745 case spv::DecorationColMajor:
1746 HasRowMajor = true;
1747 RowMajor = false;
Chris Forbesc25b8072018-12-10 15:10:39 -08001748 default:
1749 // Intentionally partial, there are many decorations we just don't care about.
1750 break;
1751 }
1752 }
1753
1754 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1755 {
1756 // Apply a decoration group to this set of decorations
1757 if (src.HasBuiltIn)
1758 {
1759 HasBuiltIn = true;
1760 BuiltIn = src.BuiltIn;
1761 }
1762
1763 if (src.HasLocation)
1764 {
1765 HasLocation = true;
1766 Location = src.Location;
1767 }
1768
1769 if (src.HasComponent)
1770 {
1771 HasComponent = true;
1772 Component = src.Component;
1773 }
1774
Chris Forbes65321072019-03-07 16:13:56 -08001775 if (src.HasOffset)
1776 {
1777 HasOffset = true;
1778 Offset = src.Offset;
1779 }
1780
1781 if (src.HasArrayStride)
1782 {
1783 HasArrayStride = true;
1784 ArrayStride = src.ArrayStride;
1785 }
1786
1787 if (src.HasMatrixStride)
1788 {
1789 HasMatrixStride = true;
1790 MatrixStride = src.MatrixStride;
1791 }
1792
Chris Forbes1ba5ba72019-04-12 11:37:21 -07001793 if (src.HasRowMajor)
1794 {
1795 HasRowMajor = true;
1796 RowMajor = src.RowMajor;
1797 }
1798
Chris Forbesc25b8072018-12-10 15:10:39 -08001799 Flat |= src.Flat;
Chris Forbes5839dcf2018-12-10 19:02:58 -08001800 NoPerspective |= src.NoPerspective;
Chris Forbesc25b8072018-12-10 15:10:39 -08001801 Centroid |= src.Centroid;
1802 Block |= src.Block;
1803 BufferBlock |= src.BufferBlock;
Ben Clayton8448cc52019-04-09 16:24:31 -04001804 RelaxedPrecision |= src.RelaxedPrecision;
Chris Forbes98e6b962019-04-12 11:58:58 -07001805 InsideMatrix |= src.InsideMatrix;
Chris Forbesc25b8072018-12-10 15:10:39 -08001806 }
Chris Forbesbc3a0ee2018-12-27 16:02:58 -08001807
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001808 void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src)
1809 {
1810 if(src.DescriptorSet >= 0)
1811 {
1812 DescriptorSet = src.DescriptorSet;
1813 }
1814
1815 if(src.Binding >= 0)
1816 {
1817 Binding = src.Binding;
1818 }
Chris Forbes24466042019-04-22 10:54:23 -07001819
1820 if (src.InputAttachmentIndex >= 0)
1821 {
1822 InputAttachmentIndex = src.InputAttachmentIndex;
1823 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001824 }
1825
Ben Claytonab51bbf2019-02-20 14:36:27 +00001826 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
Chris Forbes49d664d2019-02-12 19:24:50 +00001827 {
1828 auto it = decorations.find(id);
1829 if (it != decorations.end())
1830 d->Apply(it->second);
1831 }
1832
Ben Claytonaf973b62019-03-13 18:19:20 +00001833 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
Chris Forbes49d664d2019-02-12 19:24:50 +00001834 {
1835 auto it = memberDecorations.find(id);
1836 if (it != memberDecorations.end() && member < it->second.size())
1837 {
1838 d->Apply(it->second[member]);
1839 }
1840 }
1841
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001842 void SpirvShader::DefineResult(const InsnIterator &insn)
1843 {
1844 Type::ID typeId = insn.word(1);
1845 Object::ID resultId = insn.word(2);
1846 auto &object = defs[resultId];
1847 object.type = typeId;
Chris Forbes0b092cd2019-04-19 09:02:14 -07001848
1849 switch (getType(typeId).opcode())
1850 {
1851 case spv::OpTypePointer:
1852 case spv::OpTypeImage:
1853 case spv::OpTypeSampledImage:
1854 case spv::OpTypeSampler:
Ben Clayton1d514f32019-04-19 16:11:18 -04001855 object.kind = Object::Kind::Pointer;
Chris Forbes0b092cd2019-04-19 09:02:14 -07001856 break;
1857
1858 default:
1859 object.kind = Object::Kind::Intermediate;
1860 }
1861
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001862 object.definition = insn;
1863 }
1864
Chris Forbesd5aed492019-02-02 15:18:52 -08001865 // emit-time
1866
Chris Forbesc61271e2019-02-19 17:01:28 -08001867 void SpirvShader::emitProlog(SpirvRoutine *routine) const
Chris Forbesd5aed492019-02-02 15:18:52 -08001868 {
1869 for (auto insn : *this)
1870 {
1871 switch (insn.opcode())
1872 {
1873 case spv::OpVariable:
1874 {
Nicolas Capens29090852019-03-19 16:22:35 -04001875 Type::ID resultPointerTypeId = insn.word(1);
1876 auto resultPointerType = getType(resultPointerTypeId);
1877 auto pointeeType = getType(resultPointerType.element);
1878
1879 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
Chris Forbesd5aed492019-02-02 15:18:52 -08001880 {
Nicolas Capens29090852019-03-19 16:22:35 -04001881 Object::ID resultId = insn.word(2);
Ben Clayton47747612019-04-04 16:27:35 +01001882 routine->createVariable(resultId, pointeeType.sizeInComponents);
Chris Forbesd5aed492019-02-02 15:18:52 -08001883 }
1884 break;
1885 }
Ben Clayton69c37492019-05-13 17:31:16 +01001886 case spv::OpPhi:
1887 {
1888 auto type = getType(insn.word(1));
1889 Object::ID resultId = insn.word(2);
1890 routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
1891 break;
1892 }
Chris Forbesd5aed492019-02-02 15:18:52 -08001893 default:
Chris Forbese9f8f5b2019-02-11 00:20:16 +00001894 // Nothing else produces interface variables, so can all be safely ignored.
Chris Forbesd5aed492019-02-02 15:18:52 -08001895 break;
1896 }
1897 }
1898 }
1899
Nicolas Capens09591b82019-04-08 22:51:08 -04001900 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
Chris Forbesd5aed492019-02-02 15:18:52 -08001901 {
Nicolas Capens09591b82019-04-08 22:51:08 -04001902 EmitState state(routine, activeLaneMask, descriptorSets);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001903
Ben Clayton9b156612019-03-13 19:48:31 +00001904 // Emit everything up to the first label
1905 // TODO: Separate out dispatch of block from non-block instructions?
Chris Forbesd5aed492019-02-02 15:18:52 -08001906 for (auto insn : *this)
1907 {
Ben Clayton9b156612019-03-13 19:48:31 +00001908 if (insn.opcode() == spv::OpLabel)
Chris Forbesd5aed492019-02-02 15:18:52 -08001909 {
Chris Forbesd5aed492019-02-02 15:18:52 -08001910 break;
1911 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001912 EmitInstruction(insn, &state);
Ben Clayton9b156612019-03-13 19:48:31 +00001913 }
1914
Ben Clayton60f15ec2019-05-09 17:50:01 +01001915 // Emit all the blocks starting from entryPointBlockId.
1916 EmitBlocks(entryPointBlockId, &state);
Ben Clayton513ed1d2019-03-28 16:07:00 +00001917 }
1918
1919 void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
1920 {
1921 auto oldPending = state->pending;
1922
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001923 std::queue<Block::ID> pending;
Ben Clayton513ed1d2019-03-28 16:07:00 +00001924 state->pending = &pending;
1925 pending.push(id);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001926 while (pending.size() > 0)
Ben Clayton9b156612019-03-13 19:48:31 +00001927 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001928 auto id = pending.front();
1929 pending.pop();
Ben Clayton513ed1d2019-03-28 16:07:00 +00001930
1931 auto const &block = getBlock(id);
1932 if (id == ignore)
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001933 {
Ben Clayton513ed1d2019-03-28 16:07:00 +00001934 continue;
1935 }
1936
1937 state->currentBlock = id;
1938
1939 switch (block.kind)
1940 {
1941 case Block::Simple:
1942 case Block::StructuredBranchConditional:
1943 case Block::UnstructuredBranchConditional:
1944 case Block::StructuredSwitch:
1945 case Block::UnstructuredSwitch:
1946 EmitNonLoop(state);
1947 break;
1948
1949 case Block::Loop:
1950 EmitLoop(state);
1951 break;
1952
1953 default:
1954 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001955 }
Ben Clayton9b156612019-03-13 19:48:31 +00001956 }
Ben Clayton9b156612019-03-13 19:48:31 +00001957
Ben Clayton513ed1d2019-03-28 16:07:00 +00001958 state->pending = oldPending;
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001959 }
1960
1961 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1962 {
1963 for (auto insn = begin; insn != end; insn++)
1964 {
1965 auto res = EmitInstruction(insn, state);
1966 switch (res)
1967 {
1968 case EmitResult::Continue:
1969 continue;
1970 case EmitResult::Terminator:
1971 break;
1972 default:
1973 UNREACHABLE("Unexpected EmitResult %d", int(res));
1974 break;
1975 }
1976 }
1977 }
1978
Ben Clayton513ed1d2019-03-28 16:07:00 +00001979 void SpirvShader::EmitNonLoop(EmitState *state) const
1980 {
1981 auto blockId = state->currentBlock;
1982 auto block = getBlock(blockId);
1983
1984 // Ensure all incoming blocks have been generated.
1985 auto depsDone = true;
1986 for (auto in : block.ins)
1987 {
1988 if (state->visited.count(in) == 0)
1989 {
1990 state->pending->emplace(in);
1991 depsDone = false;
1992 }
1993 }
1994
1995 if (!depsDone)
1996 {
1997 // come back to this once the dependencies have been generated
1998 state->pending->emplace(blockId);
1999 return;
2000 }
2001
2002 if (!state->visited.emplace(blockId).second)
2003 {
2004 return; // Already generated this block.
2005 }
2006
Ben Clayton60f15ec2019-05-09 17:50:01 +01002007 if (blockId != entryPointBlockId)
Ben Clayton513ed1d2019-03-28 16:07:00 +00002008 {
2009 // Set the activeLaneMask.
Nicolas Capens459453a2019-03-27 15:27:27 -04002010 SIMD::Int activeLaneMask(0);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002011 for (auto in : block.ins)
2012 {
2013 auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
Nicolas Capens459453a2019-03-27 15:27:27 -04002014 activeLaneMask |= inMask;
Ben Clayton513ed1d2019-03-28 16:07:00 +00002015 }
Nicolas Capens459453a2019-03-27 15:27:27 -04002016 state->setActiveLaneMask(activeLaneMask);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002017 }
2018
2019 EmitInstructions(block.begin(), block.end(), state);
2020
2021 for (auto out : block.outs)
2022 {
2023 state->pending->emplace(out);
2024 }
2025 }
2026
Ben Claytone747b3c2019-03-21 19:35:15 +00002027 void SpirvShader::EmitLoop(EmitState *state) const
2028 {
2029 auto blockId = state->currentBlock;
2030 auto block = getBlock(blockId);
2031
Ben Clayton513ed1d2019-03-28 16:07:00 +00002032 // Ensure all incoming non-back edge blocks have been generated.
2033 auto depsDone = true;
2034 for (auto in : block.ins)
2035 {
2036 if (state->visited.count(in) == 0)
2037 {
2038 if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
2039 {
2040 state->pending->emplace(in);
2041 depsDone = false;
2042 }
2043 }
2044 }
2045
2046 if (!depsDone)
2047 {
2048 // come back to this once the dependencies have been generated
2049 state->pending->emplace(blockId);
2050 return;
2051 }
2052
2053 if (!state->visited.emplace(blockId).second)
2054 {
2055 return; // Already emitted this loop.
2056 }
2057
Ben Clayton69c37492019-05-13 17:31:16 +01002058 std::unordered_set<Block::ID> incomingBlocks;
2059 std::unordered_set<Block::ID> loopBlocks;
2060 for (auto in : block.ins)
2061 {
2062 if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back-edge
2063 {
2064 incomingBlocks.emplace(in);
2065 }
2066 else
2067 {
2068 loopBlocks.emplace(in);
2069 }
2070 }
2071
2072 // Emit the loop phi instructions, and initialize them with a value from
2073 // the incoming blocks.
2074 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
2075 {
2076 if (insn.opcode() == spv::OpPhi)
2077 {
2078 StorePhi(insn, state, incomingBlocks);
2079 }
2080 }
2081
Ben Claytone747b3c2019-03-21 19:35:15 +00002082 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
2083 // This is initialized with the incoming active lane masks.
2084 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
Ben Clayton69c37492019-05-13 17:31:16 +01002085 for (auto in : incomingBlocks)
Ben Claytone747b3c2019-03-21 19:35:15 +00002086 {
Ben Clayton69c37492019-05-13 17:31:16 +01002087 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
Ben Claytone747b3c2019-03-21 19:35:15 +00002088 }
2089
Ben Clayton0e976bc2019-05-13 13:53:49 +01002090 // mergeActiveLaneMasks contains edge lane masks for the merge block.
2091 // This is the union of all edge masks across all iterations of the loop.
2092 std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
2093 for (auto in : getBlock(block.mergeBlock).ins)
2094 {
2095 mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
2096 }
2097
Ben Claytone747b3c2019-03-21 19:35:15 +00002098 // Create the loop basic blocks
2099 auto headerBasicBlock = Nucleus::createBasicBlock();
2100 auto mergeBasicBlock = Nucleus::createBasicBlock();
2101
2102 // Start emitting code inside the loop.
2103 Nucleus::createBr(headerBasicBlock);
2104 Nucleus::setInsertBlock(headerBasicBlock);
2105
Ben Claytone747b3c2019-03-21 19:35:15 +00002106 // Load the active lane mask.
2107 state->setActiveLaneMask(loopActiveLaneMask);
2108
Ben Clayton69c37492019-05-13 17:31:16 +01002109 // Emit the non-phi loop header block's instructions.
Ben Claytone747b3c2019-03-21 19:35:15 +00002110 for (auto insn = block.begin(); insn != block.end(); insn++)
2111 {
Ben Clayton69c37492019-05-13 17:31:16 +01002112 if (insn.opcode() == spv::OpPhi)
2113 {
2114 LoadPhi(insn, state);
2115 }
2116 else
Ben Claytone747b3c2019-03-21 19:35:15 +00002117 {
2118 EmitInstruction(insn, state);
2119 }
2120 }
2121
Ben Clayton513ed1d2019-03-28 16:07:00 +00002122 // Emit all loop blocks, but don't emit the merge block yet.
2123 for (auto out : block.outs)
2124 {
2125 if (existsPath(out, blockId, block.mergeBlock))
2126 {
2127 EmitBlocks(out, state, block.mergeBlock);
2128 }
2129 }
2130
2131 // Rebuild the loopActiveLaneMask from the loop back edges.
Ben Claytone747b3c2019-03-21 19:35:15 +00002132 loopActiveLaneMask = SIMD::Int(0);
2133 for (auto in : block.ins)
2134 {
Ben Clayton513ed1d2019-03-28 16:07:00 +00002135 if (existsPath(blockId, in, block.mergeBlock))
Ben Claytone747b3c2019-03-21 19:35:15 +00002136 {
Ben Claytonfe3f0132019-03-26 11:10:16 +00002137 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
Ben Claytone747b3c2019-03-21 19:35:15 +00002138 }
2139 }
2140
Ben Clayton0e976bc2019-05-13 13:53:49 +01002141 // Add active lanes to the merge lane mask.
2142 for (auto in : getBlock(block.mergeBlock).ins)
2143 {
2144 auto edge = Block::Edge{in, block.mergeBlock};
2145 auto it = state->edgeActiveLaneMasks.find(edge);
2146 if (it != state->edgeActiveLaneMasks.end())
2147 {
2148 mergeActiveLaneMasks[in] |= it->second;
2149 }
2150 }
2151
Ben Clayton69c37492019-05-13 17:31:16 +01002152 // Update loop phi values.
2153 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
Ben Claytone747b3c2019-03-21 19:35:15 +00002154 {
Ben Clayton69c37492019-05-13 17:31:16 +01002155 if (insn.opcode() == spv::OpPhi)
Ben Claytone747b3c2019-03-21 19:35:15 +00002156 {
Ben Clayton69c37492019-05-13 17:31:16 +01002157 StorePhi(insn, state, loopBlocks);
Ben Claytone747b3c2019-03-21 19:35:15 +00002158 }
2159 }
2160
2161 // Loop body now done.
2162 // If any lanes are still active, jump back to the loop header,
2163 // otherwise jump to the merge block.
2164 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
2165
Ben Clayton513ed1d2019-03-28 16:07:00 +00002166 // Continue emitting from the merge block.
Ben Claytone747b3c2019-03-21 19:35:15 +00002167 Nucleus::setInsertBlock(mergeBasicBlock);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002168 state->pending->emplace(block.mergeBlock);
Ben Clayton0e976bc2019-05-13 13:53:49 +01002169 for (auto it : mergeActiveLaneMasks)
2170 {
2171 state->addActiveLaneMaskEdge(it.first, block.mergeBlock, it.second);
2172 }
Ben Claytone747b3c2019-03-21 19:35:15 +00002173 }
2174
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002175 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
Ben Clayton9b156612019-03-13 19:48:31 +00002176 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002177 auto opcode = insn.opcode();
2178
2179 switch (opcode)
Ben Clayton9b156612019-03-13 19:48:31 +00002180 {
2181 case spv::OpTypeVoid:
2182 case spv::OpTypeInt:
2183 case spv::OpTypeFloat:
2184 case spv::OpTypeBool:
2185 case spv::OpTypeVector:
2186 case spv::OpTypeArray:
2187 case spv::OpTypeRuntimeArray:
2188 case spv::OpTypeMatrix:
2189 case spv::OpTypeStruct:
2190 case spv::OpTypePointer:
2191 case spv::OpTypeFunction:
Nicolas Capens7d867272019-04-08 22:51:08 -04002192 case spv::OpTypeImage:
2193 case spv::OpTypeSampledImage:
Chris Forbesfa82c342019-04-26 16:42:38 -07002194 case spv::OpTypeSampler:
Ben Clayton9b156612019-03-13 19:48:31 +00002195 case spv::OpExecutionMode:
2196 case spv::OpMemoryModel:
2197 case spv::OpFunction:
2198 case spv::OpFunctionEnd:
2199 case spv::OpConstant:
2200 case spv::OpConstantNull:
2201 case spv::OpConstantTrue:
2202 case spv::OpConstantFalse:
2203 case spv::OpConstantComposite:
Chris Forbesea81ab72019-05-14 15:20:33 -07002204 case spv::OpSpecConstant:
2205 case spv::OpSpecConstantTrue:
2206 case spv::OpSpecConstantFalse:
2207 case spv::OpSpecConstantComposite:
2208 case spv::OpSpecConstantOp:
Chris Forbes0e712412019-03-18 19:31:16 -07002209 case spv::OpUndef:
Ben Clayton9b156612019-03-13 19:48:31 +00002210 case spv::OpExtension:
2211 case spv::OpCapability:
2212 case spv::OpEntryPoint:
2213 case spv::OpExtInstImport:
2214 case spv::OpDecorate:
2215 case spv::OpMemberDecorate:
2216 case spv::OpGroupDecorate:
2217 case spv::OpGroupMemberDecorate:
2218 case spv::OpDecorationGroup:
2219 case spv::OpName:
2220 case spv::OpMemberName:
2221 case spv::OpSource:
2222 case spv::OpSourceContinued:
2223 case spv::OpSourceExtension:
2224 case spv::OpLine:
2225 case spv::OpNoLine:
2226 case spv::OpModuleProcessed:
2227 case spv::OpString:
2228 // Nothing to do at emit time. These are either fully handled at analysis time,
2229 // or don't require any work at all.
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002230 return EmitResult::Continue;
Ben Clayton9b156612019-03-13 19:48:31 +00002231
2232 case spv::OpLabel:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002233 return EmitResult::Continue;
Ben Clayton9b156612019-03-13 19:48:31 +00002234
2235 case spv::OpVariable:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002236 return EmitVariable(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002237
2238 case spv::OpLoad:
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002239 case spv::OpAtomicLoad:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002240 return EmitLoad(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002241
2242 case spv::OpStore:
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002243 case spv::OpAtomicStore:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002244 return EmitStore(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002245
Chris Forbes17813932019-04-18 11:45:54 -07002246 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -07002247 case spv::OpAtomicISub:
Chris Forbes17813932019-04-18 11:45:54 -07002248 case spv::OpAtomicSMin:
2249 case spv::OpAtomicSMax:
2250 case spv::OpAtomicUMin:
2251 case spv::OpAtomicUMax:
2252 case spv::OpAtomicAnd:
2253 case spv::OpAtomicOr:
2254 case spv::OpAtomicXor:
Chris Forbes707ed992019-04-18 18:17:35 -07002255 case spv::OpAtomicIIncrement:
2256 case spv::OpAtomicIDecrement:
Chris Forbes17813932019-04-18 11:45:54 -07002257 case spv::OpAtomicExchange:
2258 return EmitAtomicOp(insn, state);
2259
Chris Forbesa16238d2019-04-18 16:31:54 -07002260 case spv::OpAtomicCompareExchange:
2261 return EmitAtomicCompareExchange(insn, state);
2262
Ben Clayton9b156612019-03-13 19:48:31 +00002263 case spv::OpAccessChain:
Chris Forbes10fd6242019-03-15 12:27:34 -07002264 case spv::OpInBoundsAccessChain:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002265 return EmitAccessChain(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002266
2267 case spv::OpCompositeConstruct:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002268 return EmitCompositeConstruct(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002269
2270 case spv::OpCompositeInsert:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002271 return EmitCompositeInsert(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002272
2273 case spv::OpCompositeExtract:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002274 return EmitCompositeExtract(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002275
2276 case spv::OpVectorShuffle:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002277 return EmitVectorShuffle(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002278
Chris Forbesfaed9d32019-03-15 10:31:08 -07002279 case spv::OpVectorExtractDynamic:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002280 return EmitVectorExtractDynamic(insn, state);
Chris Forbesfaed9d32019-03-15 10:31:08 -07002281
2282 case spv::OpVectorInsertDynamic:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002283 return EmitVectorInsertDynamic(insn, state);
Chris Forbesfaed9d32019-03-15 10:31:08 -07002284
Ben Clayton9b156612019-03-13 19:48:31 +00002285 case spv::OpVectorTimesScalar:
Chris Forbes57e05b82019-03-28 09:16:20 +13002286 case spv::OpMatrixTimesScalar:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002287 return EmitVectorTimesScalar(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002288
Chris Forbes06f4ed72019-03-28 09:53:20 +13002289 case spv::OpMatrixTimesVector:
2290 return EmitMatrixTimesVector(insn, state);
2291
Chris Forbesa563dd82019-03-28 10:32:55 +13002292 case spv::OpVectorTimesMatrix:
2293 return EmitVectorTimesMatrix(insn, state);
2294
Chris Forbes51562f12019-03-28 19:08:39 -07002295 case spv::OpMatrixTimesMatrix:
2296 return EmitMatrixTimesMatrix(insn, state);
2297
Ben Clayton3ee52992019-04-08 11:01:23 -04002298 case spv::OpOuterProduct:
2299 return EmitOuterProduct(insn, state);
2300
Ben Clayton620f7082019-04-08 11:12:08 -04002301 case spv::OpTranspose:
2302 return EmitTranspose(insn, state);
2303
Ben Clayton9b156612019-03-13 19:48:31 +00002304 case spv::OpNot:
Ben Claytonb5bfa502019-04-08 14:26:36 -04002305 case spv::OpBitFieldInsert:
Ben Claytond86db952019-04-08 13:43:11 -04002306 case spv::OpBitFieldSExtract:
2307 case spv::OpBitFieldUExtract:
Ben Claytond2a46432019-04-08 11:41:45 -04002308 case spv::OpBitReverse:
Ben Clayton1eb017d2019-04-08 11:32:09 -04002309 case spv::OpBitCount:
Ben Clayton9b156612019-03-13 19:48:31 +00002310 case spv::OpSNegate:
2311 case spv::OpFNegate:
2312 case spv::OpLogicalNot:
2313 case spv::OpConvertFToU:
2314 case spv::OpConvertFToS:
2315 case spv::OpConvertSToF:
2316 case spv::OpConvertUToF:
2317 case spv::OpBitcast:
2318 case spv::OpIsInf:
2319 case spv::OpIsNan:
Chris Forbesaff2dd02019-03-20 14:50:24 -07002320 case spv::OpDPdx:
2321 case spv::OpDPdxCoarse:
2322 case spv::OpDPdy:
2323 case spv::OpDPdyCoarse:
2324 case spv::OpFwidth:
2325 case spv::OpFwidthCoarse:
2326 case spv::OpDPdxFine:
2327 case spv::OpDPdyFine:
2328 case spv::OpFwidthFine:
Ben Clayton64da4ae2019-04-19 12:34:06 -04002329 case spv::OpQuantizeToF16:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002330 return EmitUnaryOp(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002331
2332 case spv::OpIAdd:
2333 case spv::OpISub:
2334 case spv::OpIMul:
2335 case spv::OpSDiv:
2336 case spv::OpUDiv:
2337 case spv::OpFAdd:
2338 case spv::OpFSub:
2339 case spv::OpFMul:
2340 case spv::OpFDiv:
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07002341 case spv::OpFMod:
Chris Forbes1a4c7122019-03-15 14:50:47 -07002342 case spv::OpFRem:
Ben Clayton9b156612019-03-13 19:48:31 +00002343 case spv::OpFOrdEqual:
2344 case spv::OpFUnordEqual:
2345 case spv::OpFOrdNotEqual:
2346 case spv::OpFUnordNotEqual:
2347 case spv::OpFOrdLessThan:
2348 case spv::OpFUnordLessThan:
2349 case spv::OpFOrdGreaterThan:
2350 case spv::OpFUnordGreaterThan:
2351 case spv::OpFOrdLessThanEqual:
2352 case spv::OpFUnordLessThanEqual:
2353 case spv::OpFOrdGreaterThanEqual:
2354 case spv::OpFUnordGreaterThanEqual:
2355 case spv::OpSMod:
Chris Forbes71673c82019-03-14 12:55:20 -07002356 case spv::OpSRem:
Ben Clayton9b156612019-03-13 19:48:31 +00002357 case spv::OpUMod:
2358 case spv::OpIEqual:
2359 case spv::OpINotEqual:
2360 case spv::OpUGreaterThan:
2361 case spv::OpSGreaterThan:
2362 case spv::OpUGreaterThanEqual:
2363 case spv::OpSGreaterThanEqual:
2364 case spv::OpULessThan:
2365 case spv::OpSLessThan:
2366 case spv::OpULessThanEqual:
2367 case spv::OpSLessThanEqual:
2368 case spv::OpShiftRightLogical:
2369 case spv::OpShiftRightArithmetic:
2370 case spv::OpShiftLeftLogical:
2371 case spv::OpBitwiseOr:
2372 case spv::OpBitwiseXor:
2373 case spv::OpBitwiseAnd:
2374 case spv::OpLogicalOr:
2375 case spv::OpLogicalAnd:
2376 case spv::OpLogicalEqual:
2377 case spv::OpLogicalNotEqual:
2378 case spv::OpUMulExtended:
2379 case spv::OpSMulExtended:
Chris Forbes3e6f60b2019-05-08 17:28:10 -07002380 case spv::OpIAddCarry:
2381 case spv::OpISubBorrow:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002382 return EmitBinaryOp(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002383
2384 case spv::OpDot:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002385 return EmitDot(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002386
2387 case spv::OpSelect:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002388 return EmitSelect(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002389
2390 case spv::OpExtInst:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002391 return EmitExtendedInstruction(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002392
2393 case spv::OpAny:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002394 return EmitAny(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002395
2396 case spv::OpAll:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002397 return EmitAll(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002398
Ben Claytone37ce612019-03-13 19:57:42 +00002399 case spv::OpBranch:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002400 return EmitBranch(insn, state);
Ben Claytone37ce612019-03-13 19:57:42 +00002401
Ben Clayton9fd02e02019-03-21 18:47:15 +00002402 case spv::OpPhi:
2403 return EmitPhi(insn, state);
2404
2405 case spv::OpSelectionMerge:
Ben Claytone747b3c2019-03-21 19:35:15 +00002406 case spv::OpLoopMerge:
Ben Clayton9fd02e02019-03-21 18:47:15 +00002407 return EmitResult::Continue;
2408
2409 case spv::OpBranchConditional:
2410 return EmitBranchConditional(insn, state);
2411
Ben Clayton213a8ce2019-03-21 18:57:23 +00002412 case spv::OpSwitch:
2413 return EmitSwitch(insn, state);
2414
Ben Clayton9fd02e02019-03-21 18:47:15 +00002415 case spv::OpUnreachable:
2416 return EmitUnreachable(insn, state);
2417
2418 case spv::OpReturn:
2419 return EmitReturn(insn, state);
2420
Chris Forbes97e95892019-04-02 13:37:37 +13002421 case spv::OpKill:
2422 return EmitKill(insn, state);
2423
Nicolas Capens7d867272019-04-08 22:51:08 -04002424 case spv::OpImageSampleImplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -04002425 return EmitImageSampleImplicitLod(None, insn, state);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07002426
Nicolas Capens125dba02019-04-24 02:03:22 -04002427 case spv::OpImageSampleExplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -04002428 return EmitImageSampleExplicitLod(None, insn, state);
2429
2430 case spv::OpImageSampleDrefImplicitLod:
2431 return EmitImageSampleImplicitLod(Dref, insn, state);
2432
2433 case spv::OpImageSampleDrefExplicitLod:
2434 return EmitImageSampleExplicitLod(Dref, insn, state);
2435
2436 case spv::OpImageSampleProjImplicitLod:
2437 return EmitImageSampleImplicitLod(Proj, insn, state);
2438
2439 case spv::OpImageSampleProjExplicitLod:
2440 return EmitImageSampleExplicitLod(Proj, insn, state);
2441
2442 case spv::OpImageSampleProjDrefImplicitLod:
2443 return EmitImageSampleImplicitLod(ProjDref, insn, state);
2444
2445 case spv::OpImageSampleProjDrefExplicitLod:
2446 return EmitImageSampleExplicitLod(ProjDref, insn, state);
Nicolas Capens125dba02019-04-24 02:03:22 -04002447
Chris Forbescd631592019-04-27 10:37:18 -07002448 case spv::OpImageFetch:
2449 return EmitImageFetch(insn, state);
2450
Chris Forbesb0d00ea2019-04-17 20:24:20 -07002451 case spv::OpImageQuerySize:
2452 return EmitImageQuerySize(insn, state);
2453
Ben Clayton0264d8e2019-05-08 15:39:40 +01002454 case spv::OpImageQuerySizeLod:
2455 return EmitImageQuerySizeLod(insn, state);
2456
Ben Claytonb4001ed2019-05-10 10:21:00 +01002457 case spv::OpImageQueryLevels:
2458 return EmitImageQueryLevels(insn, state);
2459
Ben Clayton2568cf72019-05-10 11:53:14 +01002460 case spv::OpImageQuerySamples:
2461 return EmitImageQuerySamples(insn, state);
2462
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07002463 case spv::OpImageRead:
2464 return EmitImageRead(insn, state);
2465
Chris Forbes179f0142019-04-17 20:24:44 -07002466 case spv::OpImageWrite:
2467 return EmitImageWrite(insn, state);
2468
Chris Forbesb51f2c12019-04-18 11:01:30 -07002469 case spv::OpImageTexelPointer:
2470 return EmitImageTexelPointer(insn, state);
2471
Chris Forbesfa82c342019-04-26 16:42:38 -07002472 case spv::OpSampledImage:
2473 case spv::OpImage:
2474 return EmitSampledImageCombineOrSplit(insn, state);
2475
Ben Clayton78abf372019-05-09 15:11:58 +01002476 case spv::OpCopyObject:
2477 return EmitCopyObject(insn, state);
2478
Ben Claytonb5a45462019-04-30 19:21:29 +01002479 case spv::OpCopyMemory:
2480 return EmitCopyMemory(insn, state);
2481
Ben Claytonecfeede2019-05-08 08:51:01 +01002482 case spv::OpControlBarrier:
2483 return EmitControlBarrier(insn, state);
2484
Ben Claytonb16c5862019-05-08 14:01:38 +01002485 case spv::OpMemoryBarrier:
2486 return EmitMemoryBarrier(insn, state);
2487
Ben Clayton32d47972019-04-19 17:08:15 -04002488 case spv::OpGroupNonUniformElect:
2489 return EmitGroupNonUniform(insn, state);
2490
Ben Claytone4605da2019-05-09 16:24:01 +01002491 case spv::OpArrayLength:
2492 return EmitArrayLength(insn, state);
2493
Ben Clayton9b156612019-03-13 19:48:31 +00002494 default:
Ben Clayton92797c22019-04-25 10:44:03 +01002495 UNREACHABLE("%s", OpcodeName(opcode).c_str());
Ben Clayton9b156612019-03-13 19:48:31 +00002496 break;
Chris Forbesd5aed492019-02-02 15:18:52 -08002497 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002498
2499 return EmitResult::Continue;
Chris Forbesd5aed492019-02-02 15:18:52 -08002500 }
Chris Forbesc61271e2019-02-19 17:01:28 -08002501
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002502 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002503 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002504 auto routine = state->routine;
Ben Claytonaf973b62019-03-13 18:19:20 +00002505 Object::ID resultId = insn.word(2);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002506 auto &object = getObject(resultId);
2507 auto &objectTy = getType(object.type);
Ben Clayton484e08e2019-04-05 12:11:39 +01002508
Ben Claytonefec1b92019-03-05 17:38:16 +00002509 switch (objectTy.storageClass)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002510 {
Ben Clayton484e08e2019-04-05 12:11:39 +01002511 case spv::StorageClassOutput:
2512 case spv::StorageClassPrivate:
2513 case spv::StorageClassFunction:
2514 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002515 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002516 auto base = &routine->getVariable(resultId)[0];
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002517 auto elementTy = getType(objectTy.element);
2518 auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
2519 routine->createPointer(resultId, SIMD::Pointer(base, size));
Ben Clayton484e08e2019-04-05 12:11:39 +01002520 break;
2521 }
Ben Claytonecd38482019-04-19 17:11:08 -04002522 case spv::StorageClassWorkgroup:
2523 {
2524 ASSERT(objectTy.opcode() == spv::OpTypePointer);
2525 auto base = &routine->workgroupMemory[0];
2526 auto size = workgroupMemory.size();
2527 routine->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
2528 break;
2529 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002530 case spv::StorageClassInput:
2531 {
2532 if (object.kind == Object::Kind::InterfaceVariable)
2533 {
Ben Clayton47747612019-04-04 16:27:35 +01002534 auto &dst = routine->getVariable(resultId);
Ben Claytonefec1b92019-03-05 17:38:16 +00002535 int offset = 0;
2536 VisitInterface(resultId,
2537 [&](Decorations const &d, AttribType type) {
2538 auto scalarSlot = d.Location << 2 | d.Component;
2539 dst[offset++] = routine->inputs[scalarSlot];
2540 });
2541 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002542 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002543 auto base = &routine->getVariable(resultId)[0];
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002544 auto elementTy = getType(objectTy.element);
2545 auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
2546 routine->createPointer(resultId, SIMD::Pointer(base, size));
Ben Claytonefec1b92019-03-05 17:38:16 +00002547 break;
2548 }
Nicolas Capens7d867272019-04-08 22:51:08 -04002549 case spv::StorageClassUniformConstant:
2550 {
2551 const auto &d = descriptorDecorations.at(resultId);
2552 ASSERT(d.DescriptorSet >= 0);
2553 ASSERT(d.Binding >= 0);
2554
2555 uint32_t arrayIndex = 0; // TODO(b/129523279)
2556 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Ben Clayton8c56e8d2019-04-25 08:24:01 +01002557 if (setLayout->hasBinding(d.Binding))
2558 {
2559 size_t bindingOffset = setLayout->getBindingOffset(d.Binding, arrayIndex);
2560 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
2561 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
2562 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2563 routine->createPointer(resultId, SIMD::Pointer(binding, size));
2564 }
2565 else
2566 {
2567 // TODO: Error if the variable with the non-existant binding is
2568 // used? Or perhaps strip these unused variable declarations as
2569 // a preprocess on the SPIR-V?
2570 }
Nicolas Capens7d867272019-04-08 22:51:08 -04002571 break;
2572 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002573 case spv::StorageClassUniform:
2574 case spv::StorageClassStorageBuffer:
2575 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002576 const auto &d = descriptorDecorations.at(resultId);
2577 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002578 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2579 routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
Ben Claytonefec1b92019-03-05 17:38:16 +00002580 break;
2581 }
Chris Forbesa30de542019-03-18 18:51:55 -07002582 case spv::StorageClassPushConstant:
2583 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002584 routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
Chris Forbesa30de542019-03-18 18:51:55 -07002585 break;
2586 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002587 default:
Ben Clayton92797c22019-04-25 10:44:03 +01002588 UNREACHABLE("Storage class %d", objectTy.storageClass);
Ben Claytonefec1b92019-03-05 17:38:16 +00002589 break;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002590 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002591
Ben Clayton05f27a32019-04-30 16:15:25 +01002592 if (insn.wordCount() > 4)
2593 {
2594 Object::ID initializerId = insn.word(4);
2595 if (getObject(initializerId).kind != Object::Kind::Constant)
2596 {
2597 UNIMPLEMENTED("Non-constant initializers not yet implemented");
2598 }
2599 switch (objectTy.storageClass)
2600 {
2601 case spv::StorageClassOutput:
2602 case spv::StorageClassPrivate:
2603 case spv::StorageClassFunction:
2604 {
2605 bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
Ben Clayton44741082019-05-10 11:03:14 +01002606 auto ptr = GetPointerToData(resultId, 0, routine);
Ben Clayton05f27a32019-04-30 16:15:25 +01002607 GenericValue initialValue(this, routine, initializerId);
2608 VisitMemoryObject(resultId, [&](uint32_t i, uint32_t offset)
2609 {
2610 auto p = ptr + offset;
2611 if (interleavedByLane) { p = interleaveByLane(p); }
2612 SIMD::Store(p, initialValue.Float(i), state->activeLaneMask());
2613 });
2614 break;
2615 }
2616 default:
2617 ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
2618 }
2619 }
2620
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002621 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002622 }
2623
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002624 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002625 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002626 auto routine = state->routine;
Nicolas Capens86509d92019-03-21 13:23:50 -04002627 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002628 Object::ID resultId = insn.word(2);
Ben Claytonaf973b62019-03-13 18:19:20 +00002629 Object::ID pointerId = insn.word(3);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002630 auto &result = getObject(resultId);
2631 auto &resultTy = getType(result.type);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002632 auto &pointer = getObject(pointerId);
Ben Clayton484e08e2019-04-05 12:11:39 +01002633 auto &pointerTy = getType(pointer.type);
Nicolas Capens86509d92019-03-21 13:23:50 -04002634 std::memory_order memoryOrder = std::memory_order_relaxed;
2635
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002636 ASSERT(getType(pointer.type).element == result.type);
2637 ASSERT(Type::ID(insn.word(1)) == result.type);
2638 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2639
Chris Forbes0b092cd2019-04-19 09:02:14 -07002640 if(pointerTy.storageClass == spv::StorageClassUniformConstant)
Nicolas Capens7d867272019-04-08 22:51:08 -04002641 {
2642 // Just propagate the pointer.
Nicolas Capens7d867272019-04-08 22:51:08 -04002643 auto &ptr = routine->getPointer(pointerId);
2644 routine->createPointer(resultId, ptr);
Nicolas Capens7d867272019-04-08 22:51:08 -04002645 return EmitResult::Continue;
2646 }
2647
Nicolas Capens86509d92019-03-21 13:23:50 -04002648 if(atomic)
2649 {
2650 Object::ID semanticsId = insn.word(5);
2651 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2652 memoryOrder = MemoryOrder(memorySemantics);
2653 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002654
Ben Clayton484e08e2019-04-05 12:11:39 +01002655 if (pointerTy.storageClass == spv::StorageClassImage)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002656 {
Ben Claytonefec1b92019-03-05 17:38:16 +00002657 UNIMPLEMENTED("StorageClassImage load not yet implemented");
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002658 }
2659
Ben Clayton3d497382019-04-08 16:16:12 -04002660 auto ptr = GetPointerToData(pointerId, 0, routine);
Ben Clayton831db962019-02-27 14:57:18 +00002661
Ben Clayton484e08e2019-04-05 12:11:39 +01002662 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
Ben Clayton49d81582019-03-12 20:05:04 +00002663
Nicolas Capensfabdec52019-03-21 17:04:05 -04002664 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002665
2666 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Clayton831db962019-02-27 14:57:18 +00002667 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002668 auto p = ptr + offset;
2669 if (interleavedByLane) { p = interleaveByLane(p); }
2670 dst.move(i, SIMD::Load<SIMD::Float>(p, state->activeLaneMask(), atomic, memoryOrder));
2671 });
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002672
2673 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002674 }
2675
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002676 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002677 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002678 auto routine = state->routine;
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002679 bool atomic = (insn.opcode() == spv::OpAtomicStore);
Ben Claytonaf973b62019-03-13 18:19:20 +00002680 Object::ID pointerId = insn.word(1);
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002681 Object::ID objectId = insn.word(atomic ? 4 : 2);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002682 auto &object = getObject(objectId);
2683 auto &pointer = getObject(pointerId);
2684 auto &pointerTy = getType(pointer.type);
2685 auto &elementTy = getType(pointerTy.element);
Nicolas Capens86509d92019-03-21 13:23:50 -04002686 std::memory_order memoryOrder = std::memory_order_relaxed;
2687
2688 if(atomic)
2689 {
2690 Object::ID semanticsId = insn.word(3);
2691 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2692 memoryOrder = MemoryOrder(memorySemantics);
2693 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002694
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002695 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2696
Ben Clayton484e08e2019-04-05 12:11:39 +01002697 if (pointerTy.storageClass == spv::StorageClassImage)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002698 {
Ben Claytonefec1b92019-03-05 17:38:16 +00002699 UNIMPLEMENTED("StorageClassImage store not yet implemented");
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002700 }
2701
Ben Clayton3d497382019-04-08 16:16:12 -04002702 auto ptr = GetPointerToData(pointerId, 0, routine);
Ben Clayton484e08e2019-04-05 12:11:39 +01002703 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002704
2705 if (object.kind == Object::Kind::Constant)
2706 {
Ben Clayton49d81582019-03-12 20:05:04 +00002707 // Constant source data.
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002708 auto src = reinterpret_cast<float *>(object.constantValue.get());
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002709 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002710 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002711 auto p = ptr + offset;
2712 if (interleavedByLane) { p = interleaveByLane(p); }
2713 SIMD::Store(p, SIMD::Float(src[i]), state->activeLaneMask(), atomic, memoryOrder);
2714 });
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002715 }
2716 else
2717 {
Ben Clayton49d81582019-03-12 20:05:04 +00002718 // Intermediate source data.
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002719 auto &src = routine->getIntermediate(objectId);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002720 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002721 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002722 auto p = ptr + offset;
2723 if (interleavedByLane) { p = interleaveByLane(p); }
2724 SIMD::Store(p, src.Float(i), state->activeLaneMask(), atomic, memoryOrder);
2725 });
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002726 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002727
2728 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002729 }
2730
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002731 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
Nicolas Capensfabdec52019-03-21 17:04:05 -04002732 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002733 auto routine = state->routine;
Nicolas Capensfabdec52019-03-21 17:04:05 -04002734 Type::ID typeId = insn.word(1);
2735 Object::ID resultId = insn.word(2);
2736 Object::ID baseId = insn.word(3);
2737 uint32_t numIndexes = insn.wordCount() - 4;
2738 const uint32_t *indexes = insn.wordPointer(4);
2739 auto &type = getType(typeId);
2740 ASSERT(type.sizeInComponents == 1);
Ben Clayton1d514f32019-04-19 16:11:18 -04002741 ASSERT(getObject(resultId).kind == Object::Kind::Pointer);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002742
2743 if(type.storageClass == spv::StorageClassPushConstant ||
2744 type.storageClass == spv::StorageClassUniform ||
2745 type.storageClass == spv::StorageClassStorageBuffer)
2746 {
Ben Clayton3d497382019-04-08 16:16:12 -04002747 auto ptr = WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002748 routine->createPointer(resultId, ptr);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002749 }
2750 else
2751 {
Ben Clayton5f7e9112019-04-16 11:03:40 -04002752 auto ptr = WalkAccessChain(baseId, numIndexes, indexes, routine);
2753 routine->createPointer(resultId, ptr);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002754 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002755
2756 return EmitResult::Continue;
Nicolas Capensfabdec52019-03-21 17:04:05 -04002757 }
2758
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002759 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002760 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002761 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002762 auto &type = getType(insn.word(1));
2763 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2764 auto offset = 0u;
2765
2766 for (auto i = 0u; i < insn.wordCount() - 3; i++)
2767 {
Ben Claytonaf973b62019-03-13 18:19:20 +00002768 Object::ID srcObjectId = insn.word(3u + i);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002769 auto & srcObject = getObject(srcObjectId);
2770 auto & srcObjectTy = getType(srcObject.type);
2771 GenericValue srcObjectAccess(this, routine, srcObjectId);
2772
2773 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002774 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002775 dst.move(offset++, srcObjectAccess.Float(j));
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002776 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002777 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002778
2779 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002780 }
2781
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002782 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002783 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002784 auto routine = state->routine;
Ben Claytonaf973b62019-03-13 18:19:20 +00002785 Type::ID resultTypeId = insn.word(1);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002786 auto &type = getType(resultTypeId);
2787 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2788 auto &newPartObject = getObject(insn.word(3));
2789 auto &newPartObjectTy = getType(newPartObject.type);
2790 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
2791
2792 GenericValue srcObjectAccess(this, routine, insn.word(4));
2793 GenericValue newPartObjectAccess(this, routine, insn.word(3));
2794
2795 // old components before
2796 for (auto i = 0u; i < firstNewComponent; i++)
2797 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002798 dst.move(i, srcObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002799 }
2800 // new part
2801 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
2802 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002803 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002804 }
2805 // old components after
2806 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
2807 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002808 dst.move(i, srcObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002809 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002810
2811 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002812 }
2813
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002814 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002815 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002816 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002817 auto &type = getType(insn.word(1));
2818 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2819 auto &compositeObject = getObject(insn.word(3));
Ben Claytonaf973b62019-03-13 18:19:20 +00002820 Type::ID compositeTypeId = compositeObject.definition.word(1);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002821 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
2822
2823 GenericValue compositeObjectAccess(this, routine, insn.word(3));
2824 for (auto i = 0u; i < type.sizeInComponents; i++)
2825 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002826 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002827 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002828
2829 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002830 }
2831
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002832 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002833 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002834 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002835 auto &type = getType(insn.word(1));
2836 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2837
Chris Forbes13eba6c2019-03-08 10:41:05 -08002838 // Note: number of components in result type, first half type, and second
2839 // half type are all independent.
2840 auto &firstHalfType = getType(getObject(insn.word(3)).type);
2841
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002842 GenericValue firstHalfAccess(this, routine, insn.word(3));
2843 GenericValue secondHalfAccess(this, routine, insn.word(4));
2844
2845 for (auto i = 0u; i < type.sizeInComponents; i++)
2846 {
2847 auto selector = insn.word(5 + i);
2848 if (selector == static_cast<uint32_t>(-1))
2849 {
2850 // Undefined value. Until we decide to do real undef values, zero is as good
2851 // a value as any
Nicolas Capens80c796b2019-03-19 21:38:44 -04002852 dst.move(i, RValue<SIMD::Float>(0.0f));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002853 }
Chris Forbes13eba6c2019-03-08 10:41:05 -08002854 else if (selector < firstHalfType.sizeInComponents)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002855 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002856 dst.move(i, firstHalfAccess.Float(selector));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002857 }
2858 else
2859 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002860 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002861 }
2862 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002863
2864 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002865 }
2866
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002867 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
Chris Forbesfaed9d32019-03-15 10:31:08 -07002868 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002869 auto routine = state->routine;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002870 auto &type = getType(insn.word(1));
2871 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2872 auto &srcType = getType(getObject(insn.word(3)).type);
2873
2874 GenericValue src(this, routine, insn.word(3));
2875 GenericValue index(this, routine, insn.word(4));
2876
2877 SIMD::UInt v = SIMD::UInt(0);
2878
2879 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2880 {
2881 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2882 }
2883
Nicolas Capens80c796b2019-03-19 21:38:44 -04002884 dst.move(0, v);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002885 return EmitResult::Continue;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002886 }
2887
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002888 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
Chris Forbesfaed9d32019-03-15 10:31:08 -07002889 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002890 auto routine = state->routine;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002891 auto &type = getType(insn.word(1));
2892 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2893
2894 GenericValue src(this, routine, insn.word(3));
2895 GenericValue component(this, routine, insn.word(4));
2896 GenericValue index(this, routine, insn.word(5));
2897
2898 for (auto i = 0u; i < type.sizeInComponents; i++)
2899 {
2900 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
Nicolas Capens80c796b2019-03-19 21:38:44 -04002901 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
Chris Forbesfaed9d32019-03-15 10:31:08 -07002902 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002903 return EmitResult::Continue;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002904 }
2905
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002906 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
Chris Forbes856ebf82019-03-08 15:30:18 -08002907 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002908 auto routine = state->routine;
Chris Forbes856ebf82019-03-08 15:30:18 -08002909 auto &type = getType(insn.word(1));
2910 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002911 auto lhs = GenericValue(this, routine, insn.word(3));
2912 auto rhs = GenericValue(this, routine, insn.word(4));
Chris Forbes856ebf82019-03-08 15:30:18 -08002913
2914 for (auto i = 0u; i < type.sizeInComponents; i++)
2915 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002916 dst.move(i, lhs.Float(i) * rhs.Float(0));
Chris Forbes856ebf82019-03-08 15:30:18 -08002917 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002918
2919 return EmitResult::Continue;
Chris Forbes856ebf82019-03-08 15:30:18 -08002920 }
2921
Chris Forbes06f4ed72019-03-28 09:53:20 +13002922 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2923 {
2924 auto routine = state->routine;
2925 auto &type = getType(insn.word(1));
2926 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2927 auto lhs = GenericValue(this, routine, insn.word(3));
2928 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Clayton16ab9e92019-04-08 10:57:35 -04002929 auto rhsType = getType(rhs.type);
Chris Forbes06f4ed72019-03-28 09:53:20 +13002930
2931 for (auto i = 0u; i < type.sizeInComponents; i++)
2932 {
2933 SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2934 for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2935 {
2936 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2937 }
2938 dst.move(i, v);
2939 }
2940
2941 return EmitResult::Continue;
2942 }
2943
Chris Forbesa563dd82019-03-28 10:32:55 +13002944 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2945 {
2946 auto routine = state->routine;
2947 auto &type = getType(insn.word(1));
2948 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2949 auto lhs = GenericValue(this, routine, insn.word(3));
2950 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Clayton16ab9e92019-04-08 10:57:35 -04002951 auto lhsType = getType(lhs.type);
Chris Forbesa563dd82019-03-28 10:32:55 +13002952
2953 for (auto i = 0u; i < type.sizeInComponents; i++)
2954 {
2955 SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2956 for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2957 {
2958 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2959 }
2960 dst.move(i, v);
2961 }
2962
2963 return EmitResult::Continue;
2964 }
2965
Chris Forbes51562f12019-03-28 19:08:39 -07002966 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
2967 {
2968 auto routine = state->routine;
2969 auto &type = getType(insn.word(1));
2970 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2971 auto lhs = GenericValue(this, routine, insn.word(3));
2972 auto rhs = GenericValue(this, routine, insn.word(4));
2973
2974 auto numColumns = type.definition.word(3);
2975 auto numRows = getType(type.definition.word(2)).definition.word(3);
2976 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
2977
2978 for (auto row = 0u; row < numRows; row++)
2979 {
2980 for (auto col = 0u; col < numColumns; col++)
2981 {
2982 SIMD::Float v = SIMD::Float(0);
2983 for (auto i = 0u; i < numAdds; i++)
2984 {
2985 v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
2986 }
2987 dst.move(numRows * col + row, v);
2988 }
2989 }
2990
2991 return EmitResult::Continue;
2992 }
2993
Ben Clayton3ee52992019-04-08 11:01:23 -04002994 SpirvShader::EmitResult SpirvShader::EmitOuterProduct(InsnIterator insn, EmitState *state) const
2995 {
2996 auto routine = state->routine;
2997 auto &type = getType(insn.word(1));
2998 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2999 auto lhs = GenericValue(this, routine, insn.word(3));
3000 auto rhs = GenericValue(this, routine, insn.word(4));
3001 auto &lhsType = getType(lhs.type);
3002 auto &rhsType = getType(rhs.type);
3003
3004 ASSERT(type.definition.opcode() == spv::OpTypeMatrix);
3005 ASSERT(lhsType.definition.opcode() == spv::OpTypeVector);
3006 ASSERT(rhsType.definition.opcode() == spv::OpTypeVector);
3007 ASSERT(getType(lhsType.element).opcode() == spv::OpTypeFloat);
3008 ASSERT(getType(rhsType.element).opcode() == spv::OpTypeFloat);
3009
3010 auto numRows = lhsType.definition.word(3);
3011 auto numCols = rhsType.definition.word(3);
3012
3013 for (auto col = 0u; col < numCols; col++)
3014 {
3015 for (auto row = 0u; row < numRows; row++)
3016 {
3017 dst.move(col * numRows + row, lhs.Float(row) * rhs.Float(col));
3018 }
3019 }
3020
3021 return EmitResult::Continue;
3022 }
3023
Ben Clayton620f7082019-04-08 11:12:08 -04003024 SpirvShader::EmitResult SpirvShader::EmitTranspose(InsnIterator insn, EmitState *state) const
3025 {
3026 auto routine = state->routine;
3027 auto &type = getType(insn.word(1));
3028 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3029 auto mat = GenericValue(this, routine, insn.word(3));
3030
3031 auto numCols = type.definition.word(3);
3032 auto numRows = getType(type.definition.word(2)).sizeInComponents;
3033
3034 for (auto col = 0u; col < numCols; col++)
3035 {
3036 for (auto row = 0u; row < numRows; row++)
3037 {
3038 dst.move(col * numRows + row, mat.Float(row * numCols + col));
3039 }
3040 }
3041
3042 return EmitResult::Continue;
3043 }
3044
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003045 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
Ben Claytondd1e37e2019-02-28 19:59:15 +00003046 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003047 auto routine = state->routine;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003048 auto &type = getType(insn.word(1));
3049 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3050 auto src = GenericValue(this, routine, insn.word(3));
3051
3052 for (auto i = 0u; i < type.sizeInComponents; i++)
3053 {
Ben Claytondd1e37e2019-02-28 19:59:15 +00003054 switch (insn.opcode())
3055 {
3056 case spv::OpNot:
3057 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
Nicolas Capens80c796b2019-03-19 21:38:44 -04003058 dst.move(i, ~src.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003059 break;
Ben Claytonb5bfa502019-04-08 14:26:36 -04003060 case spv::OpBitFieldInsert:
3061 {
3062 auto insert = GenericValue(this, routine, insn.word(4)).UInt(i);
3063 auto offset = GenericValue(this, routine, insn.word(5)).UInt(0);
3064 auto count = GenericValue(this, routine, insn.word(6)).UInt(0);
3065 auto one = SIMD::UInt(1);
3066 auto v = src.UInt(i);
3067 auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
3068 dst.move(i, (v & ~mask) | ((insert << offset) & mask));
3069 break;
3070 }
Ben Claytond86db952019-04-08 13:43:11 -04003071 case spv::OpBitFieldSExtract:
3072 case spv::OpBitFieldUExtract:
3073 {
3074 auto offset = GenericValue(this, routine, insn.word(4)).UInt(0);
3075 auto count = GenericValue(this, routine, insn.word(5)).UInt(0);
3076 auto one = SIMD::UInt(1);
3077 auto v = src.UInt(i);
3078 SIMD::UInt out = (v >> offset) & Bitmask32(count);
3079 if (insn.opcode() == spv::OpBitFieldSExtract)
3080 {
3081 auto sign = out & NthBit32(count - one);
3082 auto sext = ~(sign - one);
3083 out |= sext;
3084 }
3085 dst.move(i, out);
3086 break;
3087 }
Ben Claytond2a46432019-04-08 11:41:45 -04003088 case spv::OpBitReverse:
Ben Claytone339d6c2019-04-13 16:49:39 +00003089 {
Ben Claytondb4f3df2019-04-13 16:48:33 +00003090 // TODO: Add an intrinsic to reactor. Even if there isn't a
3091 // single vector instruction, there may be target-dependent
3092 // ways to make this faster.
3093 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
3094 SIMD::UInt v = src.UInt(i);
3095 v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
3096 v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
3097 v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
3098 v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
3099 v = (v >> 16) | (v << 16);
3100 dst.move(i, v);
Ben Claytond2a46432019-04-08 11:41:45 -04003101 break;
Ben Claytone339d6c2019-04-13 16:49:39 +00003102 }
Ben Clayton1eb017d2019-04-08 11:32:09 -04003103 case spv::OpBitCount:
Ben Claytone339d6c2019-04-13 16:49:39 +00003104 {
3105 // TODO: Add an intrinsic to reactor. Even if there isn't a
3106 // single vector instruction, there may be target-dependent
3107 // ways to make this faster.
3108 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
3109 auto v = src.UInt(i);
3110 SIMD::UInt c = v - ((v >> 1) & SIMD::UInt(0x55555555));
3111 c = ((c >> 2) & SIMD::UInt(0x33333333)) + (c & SIMD::UInt(0x33333333));
3112 c = ((c >> 4) + c) & SIMD::UInt(0x0F0F0F0F);
3113 c = ((c >> 8) + c) & SIMD::UInt(0x00FF00FF);
3114 c = ((c >> 16) + c) & SIMD::UInt(0x0000FFFF);
3115 dst.move(i, c);
Ben Clayton1eb017d2019-04-08 11:32:09 -04003116 break;
Ben Claytone339d6c2019-04-13 16:49:39 +00003117 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003118 case spv::OpSNegate:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003119 dst.move(i, -src.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003120 break;
3121 case spv::OpFNegate:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003122 dst.move(i, -src.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003123 break;
Chris Forbes4d503052019-03-01 17:13:57 -08003124 case spv::OpConvertFToU:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003125 dst.move(i, SIMD::UInt(src.Float(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003126 break;
3127 case spv::OpConvertFToS:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003128 dst.move(i, SIMD::Int(src.Float(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003129 break;
3130 case spv::OpConvertSToF:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003131 dst.move(i, SIMD::Float(src.Int(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003132 break;
3133 case spv::OpConvertUToF:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003134 dst.move(i, SIMD::Float(src.UInt(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003135 break;
3136 case spv::OpBitcast:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003137 dst.move(i, src.Float(i));
Chris Forbes4d503052019-03-01 17:13:57 -08003138 break;
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003139 case spv::OpIsInf:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003140 dst.move(i, IsInf(src.Float(i)));
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003141 break;
3142 case spv::OpIsNan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003143 dst.move(i, IsNan(src.Float(i)));
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003144 break;
Chris Forbesaff2dd02019-03-20 14:50:24 -07003145 case spv::OpDPdx:
3146 case spv::OpDPdxCoarse:
3147 // Derivative instructions: FS invocations are laid out like so:
3148 // 0 1
3149 // 2 3
3150 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
Nicolas Capens80c796b2019-03-19 21:38:44 -04003151 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003152 break;
3153 case spv::OpDPdy:
3154 case spv::OpDPdyCoarse:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003155 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003156 break;
3157 case spv::OpFwidth:
3158 case spv::OpFwidthCoarse:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003159 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
Chris Forbesaff2dd02019-03-20 14:50:24 -07003160 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
3161 break;
3162 case spv::OpDPdxFine:
3163 {
3164 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3165 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3166 SIMD::Float v = SIMD::Float(firstRow);
3167 v = Insert(v, secondRow, 2);
3168 v = Insert(v, secondRow, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003169 dst.move(i, v);
Chris Forbesaff2dd02019-03-20 14:50:24 -07003170 break;
3171 }
3172 case spv::OpDPdyFine:
3173 {
3174 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3175 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3176 SIMD::Float v = SIMD::Float(firstColumn);
3177 v = Insert(v, secondColumn, 1);
3178 v = Insert(v, secondColumn, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003179 dst.move(i, v);
Chris Forbesaff2dd02019-03-20 14:50:24 -07003180 break;
3181 }
3182 case spv::OpFwidthFine:
3183 {
3184 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3185 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3186 SIMD::Float dpdx = SIMD::Float(firstRow);
3187 dpdx = Insert(dpdx, secondRow, 2);
3188 dpdx = Insert(dpdx, secondRow, 3);
3189 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3190 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3191 SIMD::Float dpdy = SIMD::Float(firstColumn);
3192 dpdy = Insert(dpdy, secondColumn, 1);
3193 dpdy = Insert(dpdy, secondColumn, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003194 dst.move(i, Abs(dpdx) + Abs(dpdy));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003195 break;
3196 }
Ben Clayton64da4ae2019-04-19 12:34:06 -04003197 case spv::OpQuantizeToF16:
3198 {
Chris Forbesea81ab72019-05-14 15:20:33 -07003199 // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
Ben Clayton64da4ae2019-04-19 12:34:06 -04003200 auto abs = Abs(src.Float(i));
3201 auto sign = src.Int(i) & SIMD::Int(0x80000000);
Antonio Maiorano5bce1f42019-05-10 16:03:49 -04003202 auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
Ben Clayton64da4ae2019-04-19 12:34:06 -04003203 auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
3204 auto isNaN = IsNan(abs);
3205 auto isInfOrNan = isInf | isNaN;
3206 SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
3207 v &= ~isZero | SIMD::Int(0x80000000);
3208 v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
3209 v |= isNaN & SIMD::Int(0x400000);
3210 dst.move(i, v);
3211 break;
3212 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003213 default:
Ben Clayton92797c22019-04-25 10:44:03 +01003214 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Claytondd1e37e2019-02-28 19:59:15 +00003215 }
3216 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003217
3218 return EmitResult::Continue;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003219 }
3220
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003221 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
Ben Claytondd1e37e2019-02-28 19:59:15 +00003222 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003223 auto routine = state->routine;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003224 auto &type = getType(insn.word(1));
3225 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Chris Forbese86b6dc2019-03-01 09:08:47 -08003226 auto &lhsType = getType(getObject(insn.word(3)).type);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003227 auto lhs = GenericValue(this, routine, insn.word(3));
3228 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003229
Chris Forbese86b6dc2019-03-01 09:08:47 -08003230 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
Ben Claytondd1e37e2019-02-28 19:59:15 +00003231 {
Ben Claytondd1e37e2019-02-28 19:59:15 +00003232 switch (insn.opcode())
3233 {
3234 case spv::OpIAdd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003235 dst.move(i, lhs.Int(i) + rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003236 break;
3237 case spv::OpISub:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003238 dst.move(i, lhs.Int(i) - rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003239 break;
3240 case spv::OpIMul:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003241 dst.move(i, lhs.Int(i) * rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003242 break;
3243 case spv::OpSDiv:
Ben Clayton49d2c132019-03-14 12:21:53 +00003244 {
Ben Claytona2749f32019-03-14 19:32:41 +00003245 SIMD::Int a = lhs.Int(i);
3246 SIMD::Int b = rhs.Int(i);
3247 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3248 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
Nicolas Capens80c796b2019-03-19 21:38:44 -04003249 dst.move(i, a / b);
Ben Claytondd1e37e2019-02-28 19:59:15 +00003250 break;
Ben Clayton49d2c132019-03-14 12:21:53 +00003251 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003252 case spv::OpUDiv:
Ben Clayton49d2c132019-03-14 12:21:53 +00003253 {
3254 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003255 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003256 break;
Ben Clayton49d2c132019-03-14 12:21:53 +00003257 }
Chris Forbes71673c82019-03-14 12:55:20 -07003258 case spv::OpSRem:
3259 {
3260 SIMD::Int a = lhs.Int(i);
3261 SIMD::Int b = rhs.Int(i);
3262 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3263 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
Nicolas Capens80c796b2019-03-19 21:38:44 -04003264 dst.move(i, a % b);
Chris Forbes71673c82019-03-14 12:55:20 -07003265 break;
3266 }
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003267 case spv::OpSMod:
3268 {
Ben Claytona2749f32019-03-14 19:32:41 +00003269 SIMD::Int a = lhs.Int(i);
3270 SIMD::Int b = rhs.Int(i);
3271 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3272 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
3273 auto mod = a % b;
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003274 // If a and b have opposite signs, the remainder operation takes
3275 // the sign from a but OpSMod is supposed to take the sign of b.
3276 // Adding b will ensure that the result has the correct sign and
3277 // that it is still congruent to a modulo b.
3278 //
3279 // See also http://mathforum.org/library/drmath/view/52343.html
3280 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
3281 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003282 dst.move(i, As<SIMD::Float>(fixedMod));
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003283 break;
3284 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003285 case spv::OpUMod:
Chris Forbes3ebf5832019-03-14 08:15:25 -07003286 {
3287 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003288 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003289 break;
Chris Forbes3ebf5832019-03-14 08:15:25 -07003290 }
Ben Claytone95eeb12019-03-04 16:32:09 +00003291 case spv::OpIEqual:
Chris Forbes787b4462019-03-08 12:16:57 -08003292 case spv::OpLogicalEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003293 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003294 break;
3295 case spv::OpINotEqual:
Chris Forbes787b4462019-03-08 12:16:57 -08003296 case spv::OpLogicalNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003297 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003298 break;
3299 case spv::OpUGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003300 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003301 break;
3302 case spv::OpSGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003303 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003304 break;
3305 case spv::OpUGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003306 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003307 break;
3308 case spv::OpSGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003309 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003310 break;
3311 case spv::OpULessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003312 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003313 break;
3314 case spv::OpSLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003315 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003316 break;
3317 case spv::OpULessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003318 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003319 break;
3320 case spv::OpSLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003321 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003322 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003323 case spv::OpFAdd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003324 dst.move(i, lhs.Float(i) + rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003325 break;
3326 case spv::OpFSub:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003327 dst.move(i, lhs.Float(i) - rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003328 break;
Chris Forbes9d931532019-03-08 09:53:03 -08003329 case spv::OpFMul:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003330 dst.move(i, lhs.Float(i) * rhs.Float(i));
Chris Forbes9d931532019-03-08 09:53:03 -08003331 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003332 case spv::OpFDiv:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003333 dst.move(i, lhs.Float(i) / rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003334 break;
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07003335 case spv::OpFMod:
3336 // TODO(b/126873455): inaccurate for values greater than 2^24
Nicolas Capens80c796b2019-03-19 21:38:44 -04003337 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07003338 break;
Chris Forbes1a4c7122019-03-15 14:50:47 -07003339 case spv::OpFRem:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003340 dst.move(i, lhs.Float(i) % rhs.Float(i));
Chris Forbes1a4c7122019-03-15 14:50:47 -07003341 break;
Ben Claytonec1aeb82019-03-04 19:33:27 +00003342 case spv::OpFOrdEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003343 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003344 break;
3345 case spv::OpFUnordEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003346 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003347 break;
3348 case spv::OpFOrdNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003349 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003350 break;
3351 case spv::OpFUnordNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003352 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003353 break;
3354 case spv::OpFOrdLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003355 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003356 break;
3357 case spv::OpFUnordLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003358 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003359 break;
3360 case spv::OpFOrdGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003361 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003362 break;
3363 case spv::OpFUnordGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003364 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003365 break;
3366 case spv::OpFOrdLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003367 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003368 break;
3369 case spv::OpFUnordLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003370 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003371 break;
3372 case spv::OpFOrdGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003373 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003374 break;
3375 case spv::OpFUnordGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003376 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003377 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003378 case spv::OpShiftRightLogical:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003379 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003380 break;
3381 case spv::OpShiftRightArithmetic:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003382 dst.move(i, lhs.Int(i) >> rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003383 break;
3384 case spv::OpShiftLeftLogical:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003385 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003386 break;
3387 case spv::OpBitwiseOr:
3388 case spv::OpLogicalOr:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003389 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003390 break;
3391 case spv::OpBitwiseXor:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003392 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003393 break;
3394 case spv::OpBitwiseAnd:
3395 case spv::OpLogicalAnd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003396 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003397 break;
Chris Forbese86b6dc2019-03-01 09:08:47 -08003398 case spv::OpSMulExtended:
3399 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
3400 // In our flat view then, component i is the i'th component of the first member;
3401 // component i + N is the i'th component of the second member.
Nicolas Capens80c796b2019-03-19 21:38:44 -04003402 dst.move(i, lhs.Int(i) * rhs.Int(i));
3403 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
Chris Forbese86b6dc2019-03-01 09:08:47 -08003404 break;
3405 case spv::OpUMulExtended:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003406 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
3407 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
Chris Forbese86b6dc2019-03-01 09:08:47 -08003408 break;
Chris Forbes3e6f60b2019-05-08 17:28:10 -07003409 case spv::OpIAddCarry:
3410 dst.move(i, lhs.UInt(i) + rhs.UInt(i));
3411 dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
3412 break;
3413 case spv::OpISubBorrow:
3414 dst.move(i, lhs.UInt(i) - rhs.UInt(i));
3415 dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
3416 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003417 default:
Ben Clayton92797c22019-04-25 10:44:03 +01003418 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Claytondd1e37e2019-02-28 19:59:15 +00003419 }
3420 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003421
3422 return EmitResult::Continue;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003423 }
3424
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003425 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
Chris Forbes2b287cc2019-03-01 13:24:17 -08003426 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003427 auto routine = state->routine;
Chris Forbes2b287cc2019-03-01 13:24:17 -08003428 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00003429 ASSERT(type.sizeInComponents == 1);
Chris Forbes2b287cc2019-03-01 13:24:17 -08003430 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3431 auto &lhsType = getType(getObject(insn.word(3)).type);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003432 auto lhs = GenericValue(this, routine, insn.word(3));
3433 auto rhs = GenericValue(this, routine, insn.word(4));
Chris Forbes2b287cc2019-03-01 13:24:17 -08003434
Nicolas Capens80c796b2019-03-19 21:38:44 -04003435 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003436 return EmitResult::Continue;
Chris Forbes2b287cc2019-03-01 13:24:17 -08003437 }
3438
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003439 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
Ben Claytonbf943f62019-03-05 12:57:39 +00003440 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003441 auto routine = state->routine;
Ben Claytonbf943f62019-03-05 12:57:39 +00003442 auto &type = getType(insn.word(1));
3443 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003444 auto cond = GenericValue(this, routine, insn.word(3));
3445 auto lhs = GenericValue(this, routine, insn.word(4));
3446 auto rhs = GenericValue(this, routine, insn.word(5));
Ben Claytonbf943f62019-03-05 12:57:39 +00003447
3448 for (auto i = 0u; i < type.sizeInComponents; i++)
3449 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003450 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
Ben Claytonbf943f62019-03-05 12:57:39 +00003451 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003452
3453 return EmitResult::Continue;
Ben Claytonbf943f62019-03-05 12:57:39 +00003454 }
3455
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003456 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
Chris Forbes9667a5b2019-03-07 09:26:48 -08003457 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003458 auto routine = state->routine;
Chris Forbes9667a5b2019-03-07 09:26:48 -08003459 auto &type = getType(insn.word(1));
3460 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3461 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
3462
3463 switch (extInstIndex)
3464 {
3465 case GLSLstd450FAbs:
3466 {
3467 auto src = GenericValue(this, routine, insn.word(5));
3468 for (auto i = 0u; i < type.sizeInComponents; i++)
3469 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003470 dst.move(i, Abs(src.Float(i)));
Chris Forbes9667a5b2019-03-07 09:26:48 -08003471 }
3472 break;
3473 }
3474 case GLSLstd450SAbs:
3475 {
3476 auto src = GenericValue(this, routine, insn.word(5));
3477 for (auto i = 0u; i < type.sizeInComponents; i++)
3478 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003479 dst.move(i, Abs(src.Int(i)));
Chris Forbes9667a5b2019-03-07 09:26:48 -08003480 }
3481 break;
3482 }
Chris Forbes15dff362019-03-08 11:31:31 -08003483 case GLSLstd450Cross:
3484 {
3485 auto lhs = GenericValue(this, routine, insn.word(5));
3486 auto rhs = GenericValue(this, routine, insn.word(6));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003487 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
3488 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
3489 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
Chris Forbes15dff362019-03-08 11:31:31 -08003490 break;
3491 }
Chris Forbesc212bbd2019-03-08 12:02:27 -08003492 case GLSLstd450Floor:
3493 {
3494 auto src = GenericValue(this, routine, insn.word(5));
3495 for (auto i = 0u; i < type.sizeInComponents; i++)
3496 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003497 dst.move(i, Floor(src.Float(i)));
Chris Forbesc212bbd2019-03-08 12:02:27 -08003498 }
3499 break;
3500 }
Chris Forbesdd172cc2019-03-08 13:36:40 -08003501 case GLSLstd450Trunc:
3502 {
3503 auto src = GenericValue(this, routine, insn.word(5));
3504 for (auto i = 0u; i < type.sizeInComponents; i++)
3505 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003506 dst.move(i, Trunc(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003507 }
3508 break;
3509 }
3510 case GLSLstd450Ceil:
3511 {
3512 auto src = GenericValue(this, routine, insn.word(5));
3513 for (auto i = 0u; i < type.sizeInComponents; i++)
3514 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003515 dst.move(i, Ceil(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003516 }
3517 break;
3518 }
3519 case GLSLstd450Fract:
3520 {
3521 auto src = GenericValue(this, routine, insn.word(5));
3522 for (auto i = 0u; i < type.sizeInComponents; i++)
3523 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003524 dst.move(i, Frac(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003525 }
3526 break;
3527 }
3528 case GLSLstd450Round:
3529 {
3530 auto src = GenericValue(this, routine, insn.word(5));
3531 for (auto i = 0u; i < type.sizeInComponents; i++)
3532 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003533 dst.move(i, Round(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003534 }
3535 break;
3536 }
3537 case GLSLstd450RoundEven:
3538 {
3539 auto src = GenericValue(this, routine, insn.word(5));
3540 for (auto i = 0u; i < type.sizeInComponents; i++)
3541 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003542 auto x = Round(src.Float(i));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003543 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003544 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003545 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003546 }
3547 break;
3548 }
Chris Forbesdb170772019-03-08 14:50:44 -08003549 case GLSLstd450FMin:
3550 {
3551 auto lhs = GenericValue(this, routine, insn.word(5));
3552 auto rhs = GenericValue(this, routine, insn.word(6));
3553 for (auto i = 0u; i < type.sizeInComponents; i++)
3554 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003555 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003556 }
3557 break;
3558 }
3559 case GLSLstd450FMax:
3560 {
3561 auto lhs = GenericValue(this, routine, insn.word(5));
3562 auto rhs = GenericValue(this, routine, insn.word(6));
3563 for (auto i = 0u; i < type.sizeInComponents; i++)
3564 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003565 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003566 }
3567 break;
3568 }
3569 case GLSLstd450SMin:
3570 {
3571 auto lhs = GenericValue(this, routine, insn.word(5));
3572 auto rhs = GenericValue(this, routine, insn.word(6));
3573 for (auto i = 0u; i < type.sizeInComponents; i++)
3574 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003575 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003576 }
3577 break;
3578 }
3579 case GLSLstd450SMax:
3580 {
3581 auto lhs = GenericValue(this, routine, insn.word(5));
3582 auto rhs = GenericValue(this, routine, insn.word(6));
3583 for (auto i = 0u; i < type.sizeInComponents; i++)
3584 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003585 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003586 }
3587 break;
3588 }
3589 case GLSLstd450UMin:
3590 {
3591 auto lhs = GenericValue(this, routine, insn.word(5));
3592 auto rhs = GenericValue(this, routine, insn.word(6));
3593 for (auto i = 0u; i < type.sizeInComponents; i++)
3594 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003595 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003596 }
3597 break;
3598 }
3599 case GLSLstd450UMax:
3600 {
3601 auto lhs = GenericValue(this, routine, insn.word(5));
3602 auto rhs = GenericValue(this, routine, insn.word(6));
3603 for (auto i = 0u; i < type.sizeInComponents; i++)
3604 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003605 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003606 }
3607 break;
3608 }
3609 case GLSLstd450Step:
3610 {
3611 auto edge = GenericValue(this, routine, insn.word(5));
3612 auto x = GenericValue(this, routine, insn.word(6));
3613 for (auto i = 0u; i < type.sizeInComponents; i++)
3614 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003615 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
Chris Forbesdb170772019-03-08 14:50:44 -08003616 }
3617 break;
3618 }
3619 case GLSLstd450SmoothStep:
3620 {
3621 auto edge0 = GenericValue(this, routine, insn.word(5));
3622 auto edge1 = GenericValue(this, routine, insn.word(6));
3623 auto x = GenericValue(this, routine, insn.word(7));
3624 for (auto i = 0u; i < type.sizeInComponents; i++)
3625 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003626 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
3627 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003628 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
Chris Forbesdb170772019-03-08 14:50:44 -08003629 }
3630 break;
3631 }
3632 case GLSLstd450FMix:
3633 {
3634 auto x = GenericValue(this, routine, insn.word(5));
3635 auto y = GenericValue(this, routine, insn.word(6));
3636 auto a = GenericValue(this, routine, insn.word(7));
3637 for (auto i = 0u; i < type.sizeInComponents; i++)
3638 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003639 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
Chris Forbesdb170772019-03-08 14:50:44 -08003640 }
3641 break;
3642 }
3643 case GLSLstd450FClamp:
3644 {
3645 auto x = GenericValue(this, routine, insn.word(5));
3646 auto minVal = GenericValue(this, routine, insn.word(6));
3647 auto maxVal = GenericValue(this, routine, insn.word(7));
3648 for (auto i = 0u; i < type.sizeInComponents; i++)
3649 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003650 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003651 }
3652 break;
3653 }
3654 case GLSLstd450SClamp:
3655 {
3656 auto x = GenericValue(this, routine, insn.word(5));
3657 auto minVal = GenericValue(this, routine, insn.word(6));
3658 auto maxVal = GenericValue(this, routine, insn.word(7));
3659 for (auto i = 0u; i < type.sizeInComponents; i++)
3660 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003661 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003662 }
3663 break;
3664 }
3665 case GLSLstd450UClamp:
3666 {
3667 auto x = GenericValue(this, routine, insn.word(5));
3668 auto minVal = GenericValue(this, routine, insn.word(6));
3669 auto maxVal = GenericValue(this, routine, insn.word(7));
3670 for (auto i = 0u; i < type.sizeInComponents; i++)
3671 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003672 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003673 }
3674 break;
3675 }
3676 case GLSLstd450FSign:
3677 {
3678 auto src = GenericValue(this, routine, insn.word(5));
3679 for (auto i = 0u; i < type.sizeInComponents; i++)
3680 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003681 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
3682 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003683 dst.move(i, neg | pos);
Chris Forbesdb170772019-03-08 14:50:44 -08003684 }
3685 break;
3686 }
3687 case GLSLstd450SSign:
3688 {
3689 auto src = GenericValue(this, routine, insn.word(5));
3690 for (auto i = 0u; i < type.sizeInComponents; i++)
3691 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003692 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
3693 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003694 dst.move(i, neg | pos);
Chris Forbesdb170772019-03-08 14:50:44 -08003695 }
3696 break;
3697 }
Chris Forbes868ed902019-03-13 17:39:45 -07003698 case GLSLstd450Reflect:
3699 {
3700 auto I = GenericValue(this, routine, insn.word(5));
3701 auto N = GenericValue(this, routine, insn.word(6));
3702
3703 SIMD::Float d = Dot(type.sizeInComponents, I, N);
3704
3705 for (auto i = 0u; i < type.sizeInComponents; i++)
3706 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003707 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003708 }
3709 break;
3710 }
3711 case GLSLstd450Refract:
3712 {
3713 auto I = GenericValue(this, routine, insn.word(5));
3714 auto N = GenericValue(this, routine, insn.word(6));
3715 auto eta = GenericValue(this, routine, insn.word(7));
3716
3717 SIMD::Float d = Dot(type.sizeInComponents, I, N);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003718 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
Chris Forbes868ed902019-03-13 17:39:45 -07003719 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003720 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
Chris Forbes868ed902019-03-13 17:39:45 -07003721
3722 for (auto i = 0u; i < type.sizeInComponents; i++)
3723 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003724 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
Chris Forbes868ed902019-03-13 17:39:45 -07003725 }
3726 break;
3727 }
3728 case GLSLstd450FaceForward:
3729 {
3730 auto N = GenericValue(this, routine, insn.word(5));
3731 auto I = GenericValue(this, routine, insn.word(6));
3732 auto Nref = GenericValue(this, routine, insn.word(7));
3733
3734 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
3735 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
3736
3737 for (auto i = 0u; i < type.sizeInComponents; i++)
3738 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003739 auto n = N.Float(i);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003740 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
Chris Forbes868ed902019-03-13 17:39:45 -07003741 }
3742 break;
3743 }
3744 case GLSLstd450Length:
3745 {
3746 auto x = GenericValue(this, routine, insn.word(5));
3747 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
3748
Nicolas Capens80c796b2019-03-19 21:38:44 -04003749 dst.move(0, Sqrt(d));
Chris Forbes868ed902019-03-13 17:39:45 -07003750 break;
3751 }
3752 case GLSLstd450Normalize:
3753 {
3754 auto x = GenericValue(this, routine, insn.word(5));
3755 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
3756 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
3757
3758 for (auto i = 0u; i < type.sizeInComponents; i++)
3759 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003760 dst.move(i, invLength * x.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003761 }
3762 break;
3763 }
3764 case GLSLstd450Distance:
3765 {
3766 auto p0 = GenericValue(this, routine, insn.word(5));
3767 auto p1 = GenericValue(this, routine, insn.word(6));
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003768 auto p0Type = getType(p0.type);
Chris Forbes868ed902019-03-13 17:39:45 -07003769
3770 // sqrt(dot(p0-p1, p0-p1))
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003771 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
Chris Forbes868ed902019-03-13 17:39:45 -07003772
3773 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
3774 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003775 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003776 }
3777
Nicolas Capens80c796b2019-03-19 21:38:44 -04003778 dst.move(0, Sqrt(d));
Chris Forbes868ed902019-03-13 17:39:45 -07003779 break;
3780 }
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003781 case GLSLstd450Modf:
3782 {
3783 auto val = GenericValue(this, routine, insn.word(5));
3784 auto ptrId = Object::ID(insn.word(6));
3785 auto ptrTy = getType(getObject(ptrId).type);
3786 auto ptr = GetPointerToData(ptrId, 0, routine);
3787 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
3788
3789 for (auto i = 0u; i < type.sizeInComponents; i++)
3790 {
3791 auto whole = Floor(val.Float(i));
3792 auto frac = Frac(val.Float(i));
3793
3794 dst.move(i, frac);
3795
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04003796 auto p = ptr + (i * sizeof(float));
3797 if (interleavedByLane) { p = interleaveByLane(p); }
3798 SIMD::Store(p, whole, state->activeLaneMask());
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003799 }
3800 break;
3801 }
Chris Forbesfde26fe2019-04-08 14:56:52 -07003802 case GLSLstd450ModfStruct:
3803 {
3804 auto val = GenericValue(this, routine, insn.word(5));
3805 auto valTy = getType(val.type);
3806
3807 for (auto i = 0u; i < valTy.sizeInComponents; i++)
3808 {
3809 auto whole = Floor(val.Float(i));
3810 auto frac = Frac(val.Float(i));
3811
3812 dst.move(i, frac);
3813 dst.move(i + valTy.sizeInComponents, whole);
3814 }
3815 break;
3816 }
Chris Forbes50e64932019-04-08 17:49:27 -07003817 case GLSLstd450PackSnorm4x8:
3818 {
3819 auto val = GenericValue(this, routine, insn.word(5));
3820 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3821 SIMD::Int(0xFF)) |
3822 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3823 SIMD::Int(0xFF)) << 8) |
3824 ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3825 SIMD::Int(0xFF)) << 16) |
3826 ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3827 SIMD::Int(0xFF)) << 24));
3828 break;
3829 }
3830 case GLSLstd450PackUnorm4x8:
3831 {
3832 auto val = GenericValue(this, routine, insn.word(5));
3833 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
3834 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
3835 ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
3836 ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
3837 break;
3838 }
3839 case GLSLstd450PackSnorm2x16:
3840 {
3841 auto val = GenericValue(this, routine, insn.word(5));
3842 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
3843 SIMD::Int(0xFFFF)) |
3844 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
3845 SIMD::Int(0xFFFF)) << 16));
3846 break;
3847 }
3848 case GLSLstd450PackUnorm2x16:
3849 {
3850 auto val = GenericValue(this, routine, insn.word(5));
3851 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
3852 SIMD::UInt(0xFFFF)) |
3853 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
3854 SIMD::UInt(0xFFFF)) << 16));
3855 break;
3856 }
3857 case GLSLstd450PackHalf2x16:
3858 {
3859 auto val = GenericValue(this, routine, insn.word(5));
3860 dst.move(0, FloatToHalfBits(val.UInt(0), false) | FloatToHalfBits(val.UInt(1), true));
3861 break;
3862 }
3863 case GLSLstd450UnpackSnorm4x8:
3864 {
3865 auto val = GenericValue(this, routine, insn.word(5));
3866 dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3867 dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3868 dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3869 dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3870 break;
3871 }
3872 case GLSLstd450UnpackUnorm4x8:
3873 {
3874 auto val = GenericValue(this, routine, insn.word(5));
3875 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3876 dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3877 dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3878 dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3879 break;
3880 }
3881 case GLSLstd450UnpackSnorm2x16:
3882 {
3883 auto val = GenericValue(this, routine, insn.word(5));
3884 // clamp(f / 32767.0, -1.0, 1.0)
3885 dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
3886 SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3887 dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
3888 SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3889 break;
3890 }
3891 case GLSLstd450UnpackUnorm2x16:
3892 {
3893 auto val = GenericValue(this, routine, insn.word(5));
3894 // f / 65535.0
3895 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
3896 dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
3897 break;
3898 }
3899 case GLSLstd450UnpackHalf2x16:
3900 {
3901 auto val = GenericValue(this, routine, insn.word(5));
Chris Forbesd3546952019-04-30 19:32:19 -07003902 dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
3903 dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbes50e64932019-04-08 17:49:27 -07003904 break;
3905 }
Ben Clayton6caf8212019-04-09 11:28:39 -04003906 case GLSLstd450Fma:
3907 {
3908 auto a = GenericValue(this, routine, insn.word(5));
3909 auto b = GenericValue(this, routine, insn.word(6));
3910 auto c = GenericValue(this, routine, insn.word(7));
3911 for (auto i = 0u; i < type.sizeInComponents; i++)
3912 {
3913 dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
3914 }
3915 break;
3916 }
Ben Claytonfc77af12019-04-09 10:48:00 -04003917 case GLSLstd450Frexp:
3918 {
3919 auto val = GenericValue(this, routine, insn.word(5));
3920 auto ptrId = Object::ID(insn.word(6));
3921 auto ptrTy = getType(getObject(ptrId).type);
3922 auto ptr = GetPointerToData(ptrId, 0, routine);
3923 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
3924
3925 for (auto i = 0u; i < type.sizeInComponents; i++)
3926 {
3927 SIMD::Float significand;
3928 SIMD::Int exponent;
3929 std::tie(significand, exponent) = Frexp(val.Float(i));
3930
3931 dst.move(i, significand);
3932
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04003933 auto p = ptr + (i * sizeof(float));
3934 if (interleavedByLane) { p = interleaveByLane(p); }
3935 SIMD::Store(p, exponent, state->activeLaneMask());
Ben Claytonfc77af12019-04-09 10:48:00 -04003936 }
3937 break;
3938 }
3939 case GLSLstd450FrexpStruct:
3940 {
3941 auto val = GenericValue(this, routine, insn.word(5));
3942 auto numComponents = getType(val.type).sizeInComponents;
3943 for (auto i = 0u; i < numComponents; i++)
3944 {
3945 auto significandAndExponent = Frexp(val.Float(i));
3946 dst.move(i, significandAndExponent.first);
3947 dst.move(i + numComponents, significandAndExponent.second);
3948 }
3949 break;
3950 }
Ben Clayton20f6ba82019-04-09 12:07:29 -04003951 case GLSLstd450Ldexp:
3952 {
3953 auto significand = GenericValue(this, routine, insn.word(5));
3954 auto exponent = GenericValue(this, routine, insn.word(6));
3955 for (auto i = 0u; i < type.sizeInComponents; i++)
3956 {
3957 // Assumes IEEE 754
3958 auto significandExponent = Exponent(significand.Float(i));
3959 auto combinedExponent = exponent.Int(i) + significandExponent;
3960 SIMD::UInt v = (significand.UInt(i) & SIMD::UInt(0x807FFFFF)) |
3961 (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23));
3962 dst.move(i, As<SIMD::Float>(v));
3963 }
3964 break;
3965 }
Ben Claytona15fcf42019-04-09 13:04:51 -04003966 case GLSLstd450Radians:
3967 {
3968 auto degrees = GenericValue(this, routine, insn.word(5));
3969 for (auto i = 0u; i < type.sizeInComponents; i++)
3970 {
3971 dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
3972 }
3973 break;
3974 }
Ben Clayton251bc282019-04-09 13:05:52 -04003975 case GLSLstd450Degrees:
3976 {
3977 auto radians = GenericValue(this, routine, insn.word(5));
3978 for (auto i = 0u; i < type.sizeInComponents; i++)
3979 {
3980 dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
3981 }
3982 break;
3983 }
Ben Claytona2c8b772019-04-09 13:42:36 -04003984 case GLSLstd450Sin:
3985 {
3986 auto radians = GenericValue(this, routine, insn.word(5));
3987 for (auto i = 0u; i < type.sizeInComponents; i++)
3988 {
3989 dst.move(i, Sin(radians.Float(i)));
3990 }
3991 break;
3992 }
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003993 case GLSLstd450Cos:
3994 {
3995 auto radians = GenericValue(this, routine, insn.word(5));
3996 for (auto i = 0u; i < type.sizeInComponents; i++)
3997 {
3998 dst.move(i, Cos(radians.Float(i)));
3999 }
4000 break;
4001 }
Ben Clayton14740062019-04-09 13:48:41 -04004002 case GLSLstd450Tan:
4003 {
4004 auto radians = GenericValue(this, routine, insn.word(5));
4005 for (auto i = 0u; i < type.sizeInComponents; i++)
4006 {
4007 dst.move(i, Tan(radians.Float(i)));
4008 }
4009 break;
4010 }
Ben Claytonf9350d72019-04-09 14:19:02 -04004011 case GLSLstd450Asin:
4012 {
4013 auto val = GenericValue(this, routine, insn.word(5));
4014 for (auto i = 0u; i < type.sizeInComponents; i++)
4015 {
4016 dst.move(i, Asin(val.Float(i)));
4017 }
4018 break;
4019 }
Ben Claytoneafae472019-04-09 14:22:38 -04004020 case GLSLstd450Acos:
4021 {
4022 auto val = GenericValue(this, routine, insn.word(5));
4023 for (auto i = 0u; i < type.sizeInComponents; i++)
4024 {
4025 dst.move(i, Acos(val.Float(i)));
4026 }
4027 break;
4028 }
Ben Clayton749b4e02019-04-09 14:27:43 -04004029 case GLSLstd450Atan:
4030 {
4031 auto val = GenericValue(this, routine, insn.word(5));
4032 for (auto i = 0u; i < type.sizeInComponents; i++)
4033 {
4034 dst.move(i, Atan(val.Float(i)));
4035 }
4036 break;
4037 }
Ben Claytond9636972019-04-09 15:09:54 -04004038 case GLSLstd450Sinh:
4039 {
4040 auto val = GenericValue(this, routine, insn.word(5));
4041 for (auto i = 0u; i < type.sizeInComponents; i++)
4042 {
4043 dst.move(i, Sinh(val.Float(i)));
4044 }
4045 break;
4046 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004047 case GLSLstd450Cosh:
4048 {
Ben Clayton900ea2c2019-04-09 15:25:36 -04004049 auto val = GenericValue(this, routine, insn.word(5));
4050 for (auto i = 0u; i < type.sizeInComponents; i++)
4051 {
4052 dst.move(i, Cosh(val.Float(i)));
4053 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004054 break;
4055 }
4056 case GLSLstd450Tanh:
4057 {
Ben Clayton3928bd92019-04-09 15:27:41 -04004058 auto val = GenericValue(this, routine, insn.word(5));
4059 for (auto i = 0u; i < type.sizeInComponents; i++)
4060 {
4061 dst.move(i, Tanh(val.Float(i)));
4062 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004063 break;
4064 }
4065 case GLSLstd450Asinh:
4066 {
Ben Claytonf6d77ab2019-04-09 15:30:04 -04004067 auto val = GenericValue(this, routine, insn.word(5));
4068 for (auto i = 0u; i < type.sizeInComponents; i++)
4069 {
4070 dst.move(i, Asinh(val.Float(i)));
4071 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004072 break;
4073 }
4074 case GLSLstd450Acosh:
4075 {
Ben Clayton28ebcb02019-04-09 15:33:38 -04004076 auto val = GenericValue(this, routine, insn.word(5));
4077 for (auto i = 0u; i < type.sizeInComponents; i++)
4078 {
4079 dst.move(i, Acosh(val.Float(i)));
4080 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004081 break;
4082 }
4083 case GLSLstd450Atanh:
4084 {
Ben Claytonfa6a5392019-04-09 15:35:24 -04004085 auto val = GenericValue(this, routine, insn.word(5));
4086 for (auto i = 0u; i < type.sizeInComponents; i++)
4087 {
4088 dst.move(i, Atanh(val.Float(i)));
4089 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004090 break;
4091 }
4092 case GLSLstd450Atan2:
4093 {
Ben Claytona520c3e2019-04-09 15:43:45 -04004094 auto x = GenericValue(this, routine, insn.word(5));
4095 auto y = GenericValue(this, routine, insn.word(6));
4096 for (auto i = 0u; i < type.sizeInComponents; i++)
4097 {
4098 dst.move(i, Atan2(x.Float(i), y.Float(i)));
4099 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004100 break;
4101 }
4102 case GLSLstd450Pow:
4103 {
Ben Claytonbfe94f02019-04-09 15:52:12 -04004104 auto x = GenericValue(this, routine, insn.word(5));
4105 auto y = GenericValue(this, routine, insn.word(6));
4106 for (auto i = 0u; i < type.sizeInComponents; i++)
4107 {
4108 dst.move(i, Pow(x.Float(i), y.Float(i)));
4109 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004110 break;
4111 }
4112 case GLSLstd450Exp:
4113 {
Ben Clayton242f0022019-04-09 16:00:53 -04004114 auto val = GenericValue(this, routine, insn.word(5));
4115 for (auto i = 0u; i < type.sizeInComponents; i++)
4116 {
4117 dst.move(i, Exp(val.Float(i)));
4118 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004119 break;
4120 }
4121 case GLSLstd450Log:
4122 {
Ben Clayton2c1da722019-04-09 16:03:03 -04004123 auto val = GenericValue(this, routine, insn.word(5));
4124 for (auto i = 0u; i < type.sizeInComponents; i++)
4125 {
4126 dst.move(i, Log(val.Float(i)));
4127 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004128 break;
4129 }
4130 case GLSLstd450Exp2:
4131 {
Ben Claytonf40b56c2019-04-09 16:06:55 -04004132 auto val = GenericValue(this, routine, insn.word(5));
4133 for (auto i = 0u; i < type.sizeInComponents; i++)
4134 {
4135 dst.move(i, Exp2(val.Float(i)));
4136 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004137 break;
4138 }
4139 case GLSLstd450Log2:
4140 {
Ben Claytone17acfe2019-04-09 16:09:13 -04004141 auto val = GenericValue(this, routine, insn.word(5));
4142 for (auto i = 0u; i < type.sizeInComponents; i++)
4143 {
4144 dst.move(i, Log2(val.Float(i)));
4145 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004146 break;
4147 }
4148 case GLSLstd450Sqrt:
4149 {
Ben Clayton6517ad22019-04-09 16:11:40 -04004150 auto val = GenericValue(this, routine, insn.word(5));
4151 for (auto i = 0u; i < type.sizeInComponents; i++)
4152 {
4153 dst.move(i, Sqrt(val.Float(i)));
4154 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004155 break;
4156 }
4157 case GLSLstd450InverseSqrt:
4158 {
Ben Clayton93451852019-04-09 16:25:30 -04004159 auto val = GenericValue(this, routine, insn.word(5));
4160 Decorations d;
4161 ApplyDecorationsForId(&d, insn.word(5));
4162 if (d.RelaxedPrecision)
4163 {
4164 for (auto i = 0u; i < type.sizeInComponents; i++)
4165 {
4166 dst.move(i, RcpSqrt_pp(val.Float(i)));
4167 }
4168 }
4169 else
4170 {
4171 for (auto i = 0u; i < type.sizeInComponents; i++)
4172 {
4173 dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
4174 }
4175 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004176 break;
4177 }
4178 case GLSLstd450Determinant:
4179 {
Ben Clayton1fb633c2019-04-09 17:24:59 -04004180 auto mat = GenericValue(this, routine, insn.word(5));
4181 auto numComponents = getType(mat.type).sizeInComponents;
4182 switch (numComponents)
4183 {
4184 case 4: // 2x2
4185 dst.move(0, Determinant(
4186 mat.Float(0), mat.Float(1),
4187 mat.Float(2), mat.Float(3)));
4188 break;
4189 case 9: // 3x3
4190 dst.move(0, Determinant(
4191 mat.Float(0), mat.Float(1), mat.Float(2),
4192 mat.Float(3), mat.Float(4), mat.Float(5),
4193 mat.Float(6), mat.Float(7), mat.Float(8)));
4194 break;
4195 case 16: // 4x4
4196 dst.move(0, Determinant(
4197 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4198 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4199 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4200 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
4201 break;
4202 default:
4203 UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
4204 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004205 break;
4206 }
4207 case GLSLstd450MatrixInverse:
4208 {
Ben Clayton445a44a2019-04-10 16:37:19 -04004209 auto mat = GenericValue(this, routine, insn.word(5));
4210 auto numComponents = getType(mat.type).sizeInComponents;
4211 switch (numComponents)
4212 {
4213 case 4: // 2x2
4214 {
4215 auto inv = MatrixInverse(
4216 mat.Float(0), mat.Float(1),
4217 mat.Float(2), mat.Float(3));
4218 for (uint32_t i = 0; i < inv.size(); i++)
4219 {
4220 dst.move(i, inv[i]);
4221 }
4222 break;
4223 }
4224 case 9: // 3x3
4225 {
4226 auto inv = MatrixInverse(
4227 mat.Float(0), mat.Float(1), mat.Float(2),
4228 mat.Float(3), mat.Float(4), mat.Float(5),
4229 mat.Float(6), mat.Float(7), mat.Float(8));
4230 for (uint32_t i = 0; i < inv.size(); i++)
4231 {
4232 dst.move(i, inv[i]);
4233 }
4234 break;
4235 }
4236 case 16: // 4x4
4237 {
4238 auto inv = MatrixInverse(
4239 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4240 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4241 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4242 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
4243 for (uint32_t i = 0; i < inv.size(); i++)
4244 {
4245 dst.move(i, inv[i]);
4246 }
4247 break;
4248 }
4249 default:
4250 UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
4251 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004252 break;
4253 }
4254 case GLSLstd450IMix:
4255 {
Ben Clayton238fec32019-04-09 16:27:56 -04004256 UNREACHABLE("GLSLstd450IMix has been removed from the specification");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004257 break;
4258 }
4259 case GLSLstd450PackDouble2x32:
4260 {
Ben Clayton92797c22019-04-25 10:44:03 +01004261 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004262 break;
4263 }
4264 case GLSLstd450UnpackDouble2x32:
4265 {
Ben Clayton92797c22019-04-25 10:44:03 +01004266 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004267 break;
4268 }
4269 case GLSLstd450FindILsb:
4270 {
Ben Clayton3f007c42019-04-10 14:54:23 -04004271 auto val = GenericValue(this, routine, insn.word(5));
4272 for (auto i = 0u; i < type.sizeInComponents; i++)
4273 {
4274 auto v = val.UInt(i);
4275 dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
4276 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004277 break;
4278 }
4279 case GLSLstd450FindSMsb:
4280 {
Ben Clayton60958262019-04-10 14:53:30 -04004281 auto val = GenericValue(this, routine, insn.word(5));
4282 for (auto i = 0u; i < type.sizeInComponents; i++)
4283 {
4284 auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
4285 dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
4286 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004287 break;
4288 }
4289 case GLSLstd450FindUMsb:
4290 {
Ben Clayton60958262019-04-10 14:53:30 -04004291 auto val = GenericValue(this, routine, insn.word(5));
4292 for (auto i = 0u; i < type.sizeInComponents; i++)
4293 {
4294 dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
4295 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004296 break;
4297 }
4298 case GLSLstd450InterpolateAtCentroid:
4299 {
Ben Clayton92797c22019-04-25 10:44:03 +01004300 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004301 break;
4302 }
4303 case GLSLstd450InterpolateAtSample:
4304 {
Ben Clayton92797c22019-04-25 10:44:03 +01004305 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004306 break;
4307 }
4308 case GLSLstd450InterpolateAtOffset:
4309 {
Ben Clayton92797c22019-04-25 10:44:03 +01004310 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004311 break;
4312 }
4313 case GLSLstd450NMin:
4314 {
Ben Claytonee10bcf2019-04-09 17:01:01 -04004315 auto x = GenericValue(this, routine, insn.word(5));
4316 auto y = GenericValue(this, routine, insn.word(6));
4317 for (auto i = 0u; i < type.sizeInComponents; i++)
4318 {
4319 dst.move(i, NMin(x.Float(i), y.Float(i)));
4320 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004321 break;
4322 }
4323 case GLSLstd450NMax:
4324 {
Ben Clayton02de7e02019-04-09 17:01:26 -04004325 auto x = GenericValue(this, routine, insn.word(5));
4326 auto y = GenericValue(this, routine, insn.word(6));
4327 for (auto i = 0u; i < type.sizeInComponents; i++)
4328 {
4329 dst.move(i, NMax(x.Float(i), y.Float(i)));
4330 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004331 break;
4332 }
4333 case GLSLstd450NClamp:
4334 {
Ben Clayton4d633122019-04-09 17:02:34 -04004335 auto x = GenericValue(this, routine, insn.word(5));
4336 auto minVal = GenericValue(this, routine, insn.word(6));
4337 auto maxVal = GenericValue(this, routine, insn.word(7));
4338 for (auto i = 0u; i < type.sizeInComponents; i++)
4339 {
4340 auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
4341 dst.move(i, clamp);
4342 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004343 break;
4344 }
Chris Forbes9667a5b2019-03-07 09:26:48 -08004345 default:
Ben Clayton92797c22019-04-25 10:44:03 +01004346 UNREACHABLE("ExtInst %d", int(extInstIndex));
Ben Clayton91fd0e22019-04-09 15:19:39 -04004347 break;
Chris Forbes9667a5b2019-03-07 09:26:48 -08004348 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004349
4350 return EmitResult::Continue;
Chris Forbes9667a5b2019-03-07 09:26:48 -08004351 }
4352
Nicolas Capens86509d92019-03-21 13:23:50 -04004353 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
4354 {
Ben Claytonb16c5862019-05-08 14:01:38 +01004355 auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
4356 spv::MemorySemanticsAcquireMask |
4357 spv::MemorySemanticsReleaseMask |
4358 spv::MemorySemanticsAcquireReleaseMask |
4359 spv::MemorySemanticsSequentiallyConsistentMask
4360 );
4361 switch (control)
Nicolas Capens86509d92019-03-21 13:23:50 -04004362 {
4363 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
4364 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
4365 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
4366 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
4367 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
4368 default:
Ben Claytonb16c5862019-05-08 14:01:38 +01004369 // "it is invalid for more than one of these four bits to be set:
4370 // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
4371 UNREACHABLE("MemorySemanticsMask: %x", int(control));
Nicolas Capens86509d92019-03-21 13:23:50 -04004372 return std::memory_order_acq_rel;
4373 }
4374 }
4375
Chris Forbes868ed902019-03-13 17:39:45 -07004376 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
4377 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004378 SIMD::Float d = x.Float(0) * y.Float(0);
Chris Forbes868ed902019-03-13 17:39:45 -07004379
4380 for (auto i = 1u; i < numComponents; i++)
4381 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004382 d += x.Float(i) * y.Float(i);
Chris Forbes868ed902019-03-13 17:39:45 -07004383 }
4384
4385 return d;
4386 }
4387
Chris Forbes50e64932019-04-08 17:49:27 -07004388 SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
4389 {
4390 static const uint32_t mask_sign = 0x80000000u;
4391 static const uint32_t mask_round = ~0xfffu;
4392 static const uint32_t c_f32infty = 255 << 23;
4393 static const uint32_t c_magic = 15 << 23;
4394 static const uint32_t c_nanbit = 0x200;
4395 static const uint32_t c_infty_as_fp16 = 0x7c00;
4396 static const uint32_t c_clamp = (31 << 23) - 0x1000;
4397
4398 SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
4399 SIMD::UInt absf = floatBits ^ justsign;
4400 SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
4401
4402 // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
4403 // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
4404 SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
4405 As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
4406 ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) |
4407 SIMD::UInt(c_infty_as_fp16)));
4408
4409 return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
4410 }
4411
Ben Claytonfc77af12019-04-09 10:48:00 -04004412 std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
4413 {
4414 // Assumes IEEE 754
4415 auto v = As<SIMD::UInt>(val);
4416 auto isNotZero = CmpNEQ(v & SIMD::UInt(0x7FFFFFFF), SIMD::UInt(0));
4417 auto zeroSign = v & SIMD::UInt(0x80000000) & ~isNotZero;
Ben Claytonf41ca6b2019-04-10 22:33:00 +01004418 auto significand = As<SIMD::Float>((((v & SIMD::UInt(0x807FFFFF)) | SIMD::UInt(0x3F000000)) & isNotZero) | zeroSign);
Ben Clayton20f6ba82019-04-09 12:07:29 -04004419 auto exponent = Exponent(val) & SIMD::Int(isNotZero);
Ben Claytonfc77af12019-04-09 10:48:00 -04004420 return std::make_pair(significand, exponent);
4421 }
4422
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004423 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
Chris Forbes0785f692019-03-08 09:09:18 -08004424 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004425 auto routine = state->routine;
Chris Forbes0785f692019-03-08 09:09:18 -08004426 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00004427 ASSERT(type.sizeInComponents == 1);
Chris Forbes0785f692019-03-08 09:09:18 -08004428 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
4429 auto &srcType = getType(getObject(insn.word(3)).type);
4430 auto src = GenericValue(this, routine, insn.word(3));
4431
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004432 SIMD::UInt result = src.UInt(0);
Chris Forbes0785f692019-03-08 09:09:18 -08004433
4434 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4435 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004436 result |= src.UInt(i);
Chris Forbes0785f692019-03-08 09:09:18 -08004437 }
4438
Nicolas Capens80c796b2019-03-19 21:38:44 -04004439 dst.move(0, result);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004440 return EmitResult::Continue;
Chris Forbes0785f692019-03-08 09:09:18 -08004441 }
4442
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004443 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
Chris Forbes0785f692019-03-08 09:09:18 -08004444 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004445 auto routine = state->routine;
Chris Forbes0785f692019-03-08 09:09:18 -08004446 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00004447 ASSERT(type.sizeInComponents == 1);
Chris Forbes0785f692019-03-08 09:09:18 -08004448 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
4449 auto &srcType = getType(getObject(insn.word(3)).type);
4450 auto src = GenericValue(this, routine, insn.word(3));
4451
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004452 SIMD::UInt result = src.UInt(0);
Chris Forbes0785f692019-03-08 09:09:18 -08004453
4454 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4455 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004456 result &= src.UInt(i);
Chris Forbes0785f692019-03-08 09:09:18 -08004457 }
4458
Nicolas Capens80c796b2019-03-19 21:38:44 -04004459 dst.move(0, result);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004460 return EmitResult::Continue;
Chris Forbes0785f692019-03-08 09:09:18 -08004461 }
4462
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004463 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
Ben Claytone37ce612019-03-13 19:57:42 +00004464 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004465 auto target = Block::ID(insn.word(1));
Ben Clayton0e976bc2019-05-13 13:53:49 +01004466 state->addActiveLaneMaskEdge(state->currentBlock, target, state->activeLaneMask());
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004467 return EmitResult::Terminator;
Ben Claytone37ce612019-03-13 19:57:42 +00004468 }
4469
Ben Clayton9fd02e02019-03-21 18:47:15 +00004470 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
4471 {
4472 auto block = getBlock(state->currentBlock);
4473 ASSERT(block.branchInstruction == insn);
4474
4475 auto condId = Object::ID(block.branchInstruction.word(1));
4476 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
4477 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
4478
4479 auto cond = GenericValue(this, state->routine, condId);
Ben Clayton16ab9e92019-04-08 10:57:35 -04004480 ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
Ben Clayton9fd02e02019-03-21 18:47:15 +00004481
4482 // TODO: Optimize for case where all lanes take same path.
4483
4484 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
4485 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
4486
4487 return EmitResult::Terminator;
4488 }
4489
Ben Clayton213a8ce2019-03-21 18:57:23 +00004490 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
4491 {
4492 auto block = getBlock(state->currentBlock);
4493 ASSERT(block.branchInstruction == insn);
4494
4495 auto selId = Object::ID(block.branchInstruction.word(1));
4496
4497 auto sel = GenericValue(this, state->routine, selId);
Ben Clayton16ab9e92019-04-08 10:57:35 -04004498 ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");
Ben Clayton213a8ce2019-03-21 18:57:23 +00004499
4500 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
4501
4502 // TODO: Optimize for case where all lanes take same path.
4503
4504 SIMD::Int defaultLaneMask = state->activeLaneMask();
4505
4506 // Gather up the case label matches and calculate defaultLaneMask.
4507 std::vector<RValue<SIMD::Int>> caseLabelMatches;
4508 caseLabelMatches.reserve(numCases);
4509 for (uint32_t i = 0; i < numCases; i++)
4510 {
4511 auto label = block.branchInstruction.word(i * 2 + 3);
4512 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
4513 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
4514 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
4515 defaultLaneMask &= ~caseLabelMatch;
4516 }
4517
4518 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
4519 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
4520
4521 return EmitResult::Terminator;
4522 }
Ben Clayton9fd02e02019-03-21 18:47:15 +00004523
4524 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
4525 {
4526 // TODO: Log something in this case?
4527 state->setActiveLaneMask(SIMD::Int(0));
4528 return EmitResult::Terminator;
4529 }
4530
4531 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
4532 {
4533 state->setActiveLaneMask(SIMD::Int(0));
4534 return EmitResult::Terminator;
4535 }
4536
Chris Forbes97e95892019-04-02 13:37:37 +13004537 SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
4538 {
4539 state->routine->killMask |= SignMask(state->activeLaneMask());
4540 state->setActiveLaneMask(SIMD::Int(0));
4541 return EmitResult::Terminator;
4542 }
4543
Ben Clayton9fd02e02019-03-21 18:47:15 +00004544 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
4545 {
Ben Clayton69c37492019-05-13 17:31:16 +01004546 auto currentBlock = getBlock(state->currentBlock);
4547 StorePhi(insn, state, currentBlock.ins);
4548 LoadPhi(insn, state);
4549 return EmitResult::Continue;
4550 }
4551
4552 void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
4553 {
4554 auto routine = state->routine;
4555 auto typeId = Type::ID(insn.word(1));
4556 auto type = getType(typeId);
4557 auto objectId = Object::ID(insn.word(2));
4558
4559 auto storageIt = state->routine->phis.find(objectId);
4560 ASSERT(storageIt != state->routine->phis.end());
4561 auto &storage = storageIt->second;
4562
4563 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
4564 for(uint32_t i = 0; i < type.sizeInComponents; i++)
4565 {
4566 dst.move(i, storage[i]);
4567 }
4568 }
4569
4570 void SpirvShader::StorePhi(InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
4571 {
Ben Clayton9fd02e02019-03-21 18:47:15 +00004572 auto routine = state->routine;
4573 auto typeId = Type::ID(insn.word(1));
4574 auto type = getType(typeId);
4575 auto objectId = Object::ID(insn.word(2));
Ben Clayton5d143aa2019-04-03 13:30:14 +01004576 auto currentBlock = getBlock(state->currentBlock);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004577
Ben Clayton69c37492019-05-13 17:31:16 +01004578 auto storageIt = state->routine->phis.find(objectId);
4579 ASSERT(storageIt != state->routine->phis.end());
4580 auto &storage = storageIt->second;
Ben Clayton9fd02e02019-03-21 18:47:15 +00004581
Ben Clayton9fd02e02019-03-21 18:47:15 +00004582 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
4583 {
4584 auto varId = Object::ID(insn.word(w + 0));
4585 auto blockId = Block::ID(insn.word(w + 1));
4586
Ben Clayton69c37492019-05-13 17:31:16 +01004587 if (filter.count(blockId) == 0)
Ben Clayton5d143aa2019-04-03 13:30:14 +01004588 {
Ben Clayton69c37492019-05-13 17:31:16 +01004589 continue;
Ben Clayton5d143aa2019-04-03 13:30:14 +01004590 }
4591
Ben Claytonfe3f0132019-03-26 11:10:16 +00004592 auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
Ben Clayton69c37492019-05-13 17:31:16 +01004593 auto in = GenericValue(this, routine, varId);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004594
4595 for (uint32_t i = 0; i < type.sizeInComponents; i++)
4596 {
Ben Clayton69c37492019-05-13 17:31:16 +01004597 storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
Ben Clayton9fd02e02019-03-21 18:47:15 +00004598 }
Ben Clayton9fd02e02019-03-21 18:47:15 +00004599 }
Ben Clayton9fd02e02019-03-21 18:47:15 +00004600 }
4601
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004602 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
Nicolas Capens7d867272019-04-08 22:51:08 -04004603 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004604 return EmitImageSample({variant, Implicit}, insn, state);
Nicolas Capens125dba02019-04-24 02:03:22 -04004605 }
4606
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004607 SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
Nicolas Capens125dba02019-04-24 02:03:22 -04004608 {
Chris Forbes8ef6d1f2019-05-10 10:19:59 -07004609 auto isDref = (variant == Dref) || (variant == ProjDref);
4610 uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(isDref ? 6 : 5));
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004611 imageOperands &= ~spv::ImageOperandsConstOffsetMask; // Dealt with later.
Nicolas Capens78896332019-04-29 16:41:50 -04004612
Nicolas Capens420d9da2019-04-26 17:44:42 -04004613 if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
4614 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004615 return EmitImageSample({variant, Lod}, insn, state);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004616 }
4617 else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
4618 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004619 return EmitImageSample({variant, Grad}, insn, state);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004620 }
4621 else UNIMPLEMENTED("Image Operands %x", imageOperands);
4622 return EmitResult::Continue;
Nicolas Capens125dba02019-04-24 02:03:22 -04004623 }
4624
Chris Forbescd631592019-04-27 10:37:18 -07004625 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
4626 {
Chris Forbes6f1b7652019-04-30 13:01:47 -07004627 return EmitImageSample({None, Fetch}, insn, state);
Chris Forbescd631592019-04-27 10:37:18 -07004628 }
4629
Nicolas Capens78896332019-04-29 16:41:50 -04004630 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
Nicolas Capens125dba02019-04-24 02:03:22 -04004631 {
Nicolas Capens7d867272019-04-08 22:51:08 -04004632 Type::ID resultTypeId = insn.word(1);
4633 Object::ID resultId = insn.word(2);
4634 Object::ID sampledImageId = insn.word(3);
4635 Object::ID coordinateId = insn.word(4);
4636 auto &resultType = getType(resultTypeId);
4637
4638 auto &result = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Chris Forbesfa82c342019-04-26 16:42:38 -07004639 auto imageDescriptor = state->routine->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
4640
4641 // If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
4642 auto &sampledImage = getObject(sampledImageId);
4643 auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
4644 state->routine->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
4645
Nicolas Capens7d867272019-04-08 22:51:08 -04004646 auto coordinate = GenericValue(this, state->routine, coordinateId);
Nicolas Capens125dba02019-04-24 02:03:22 -04004647 auto &coordinateType = getType(coordinate.type);
Nicolas Capens7d867272019-04-08 22:51:08 -04004648
Nicolas Capens97da7822019-04-30 17:33:26 -04004649 Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
Nicolas Capens97da7822019-04-30 17:33:26 -04004650 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
Nicolas Capens7d867272019-04-08 22:51:08 -04004651
Nicolas Capens125dba02019-04-24 02:03:22 -04004652 uint32_t imageOperands = spv::ImageOperandsMaskNone;
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004653 bool lodOrBias = false;
4654 Object::ID lodOrBiasId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004655 bool grad = false;
Nicolas Capens420d9da2019-04-26 17:44:42 -04004656 Object::ID gradDxId = 0;
4657 Object::ID gradDyId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004658 bool constOffset = false;
Nicolas Capens022bd572019-04-29 23:45:25 -04004659 Object::ID offsetId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004660 bool sample = false;
4661
Chris Forbesc71c17f2019-05-04 10:01:04 -07004662 uint32_t operand = instruction.isDref() ? 6 : 5;
4663
4664 if(insn.wordCount() > operand)
Nicolas Capens125dba02019-04-24 02:03:22 -04004665 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07004666 imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
Nicolas Capens125dba02019-04-24 02:03:22 -04004667
4668 if(imageOperands & spv::ImageOperandsBiasMask)
4669 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004670 lodOrBias = true;
4671 lodOrBiasId = insn.word(operand);
4672 operand++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004673 imageOperands &= ~spv::ImageOperandsBiasMask;
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004674
4675 ASSERT(instruction.samplerMethod == Implicit);
4676 instruction.samplerMethod = Bias;
Nicolas Capens125dba02019-04-24 02:03:22 -04004677 }
4678
4679 if(imageOperands & spv::ImageOperandsLodMask)
4680 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004681 lodOrBias = true;
4682 lodOrBiasId = insn.word(operand);
Nicolas Capens125dba02019-04-24 02:03:22 -04004683 operand++;
4684 imageOperands &= ~spv::ImageOperandsLodMask;
4685 }
4686
4687 if(imageOperands & spv::ImageOperandsGradMask)
4688 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004689 ASSERT(!lodOrBias); // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
Nicolas Capens125dba02019-04-24 02:03:22 -04004690 grad = true;
Nicolas Capens420d9da2019-04-26 17:44:42 -04004691 gradDxId = insn.word(operand + 0);
4692 gradDyId = insn.word(operand + 1);
4693 operand += 2;
Nicolas Capens125dba02019-04-24 02:03:22 -04004694 imageOperands &= ~spv::ImageOperandsGradMask;
4695 }
4696
4697 if(imageOperands & spv::ImageOperandsConstOffsetMask)
4698 {
Nicolas Capens125dba02019-04-24 02:03:22 -04004699 constOffset = true;
Nicolas Capens022bd572019-04-29 23:45:25 -04004700 offsetId = insn.word(operand);
4701 operand++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004702 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
4703 }
4704
4705 if(imageOperands & spv::ImageOperandsSampleMask)
4706 {
4707 UNIMPLEMENTED("Image operand %x", spv::ImageOperandsSampleMask); (void)sample;
4708 sample = true;
4709 imageOperands &= ~spv::ImageOperandsSampleMask;
4710 }
4711
4712 if(imageOperands != 0)
4713 {
4714 UNIMPLEMENTED("Image operand %x", imageOperands);
4715 }
4716 }
4717
Nicolas Capens420d9da2019-04-26 17:44:42 -04004718 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
Nicolas Capens125dba02019-04-24 02:03:22 -04004719
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004720 uint32_t coordinates = coordinateType.sizeInComponents - instruction.isProj();
4721 instruction.coordinates = coordinates;
4722
Nicolas Capens125dba02019-04-24 02:03:22 -04004723 uint32_t i = 0;
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004724 for( ; i < coordinates; i++)
Nicolas Capens125dba02019-04-24 02:03:22 -04004725 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004726 if(instruction.isProj())
4727 {
4728 in[i] = coordinate.Float(i) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
4729 }
4730 else
4731 {
4732 in[i] = coordinate.Float(i);
4733 }
4734 }
4735
4736 if(instruction.isDref())
4737 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07004738 auto drefValue = GenericValue(this, state->routine, insn.word(5));
4739 in[i] = drefValue.Float(0);
4740 i++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004741 }
4742
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004743 if(lodOrBias)
Nicolas Capens125dba02019-04-24 02:03:22 -04004744 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004745 auto lodValue = GenericValue(this, state->routine, lodOrBiasId);
Nicolas Capens125dba02019-04-24 02:03:22 -04004746 in[i] = lodValue.Float(0);
4747 i++;
4748 }
Nicolas Capens420d9da2019-04-26 17:44:42 -04004749 else if(grad)
4750 {
4751 auto dxValue = GenericValue(this, state->routine, gradDxId);
4752 auto dyValue = GenericValue(this, state->routine, gradDyId);
4753 auto &dxyType = getType(dxValue.type);
4754 ASSERT(dxyType.sizeInComponents == getType(dyValue.type).sizeInComponents);
4755
4756 instruction.gradComponents = dxyType.sizeInComponents;
4757
Nicolas Capens022bd572019-04-29 23:45:25 -04004758 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
Nicolas Capens420d9da2019-04-26 17:44:42 -04004759 {
4760 in[i] = dxValue.Float(j);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004761 }
4762
Nicolas Capens022bd572019-04-29 23:45:25 -04004763 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
Nicolas Capens420d9da2019-04-26 17:44:42 -04004764 {
4765 in[i] = dyValue.Float(j);
Nicolas Capens022bd572019-04-29 23:45:25 -04004766 }
4767 }
4768
4769 if(constOffset)
4770 {
4771 auto offsetValue = GenericValue(this, state->routine, offsetId);
4772 auto &offsetType = getType(offsetValue.type);
4773
4774 instruction.samplerOption = Offset;
4775 instruction.offsetComponents = offsetType.sizeInComponents;
4776
4777 for(uint32_t j = 0; j < offsetType.sizeInComponents; j++, i++)
4778 {
4779 in[i] = offsetValue.Float(j); // Integer values, but transfered as float.
Nicolas Capens420d9da2019-04-26 17:44:42 -04004780 }
4781 }
4782
Chris Forbes45f9a932019-05-08 13:30:38 -07004783 auto samplerFunc = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
Nicolas Capens7d867272019-04-08 22:51:08 -04004784
Ben Clayton96fbe082019-04-16 19:28:11 -04004785 Array<SIMD::Float> out(4);
Nicolas Capens97da7822019-04-30 17:33:26 -04004786 Call<ImageSampler>(samplerFunc, texture, sampler, &in[0], &out[0], state->routine->constants);
Nicolas Capens7d867272019-04-08 22:51:08 -04004787
Chris Forbesc71c17f2019-05-04 10:01:04 -07004788 for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
Nicolas Capens7d867272019-04-08 22:51:08 -04004789
4790 return EmitResult::Continue;
4791 }
4792
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004793 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
4794 {
Ben Clayton0264d8e2019-05-08 15:39:40 +01004795 auto &resultTy = getType(Type::ID(insn.word(1)));
4796 auto resultId = Object::ID(insn.word(2));
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004797 auto imageId = Object::ID(insn.word(3));
Ben Clayton0264d8e2019-05-08 15:39:40 +01004798 auto lodId = Object::ID(0);
4799
4800 auto &dst = state->routine->createIntermediate(resultId, resultTy.sizeInComponents);
4801 GetImageDimensions(state->routine, resultTy, imageId, lodId, dst);
4802
4803 return EmitResult::Continue;
4804 }
4805
4806 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
4807 {
4808 auto &resultTy = getType(Type::ID(insn.word(1)));
4809 auto resultId = Object::ID(insn.word(2));
4810 auto imageId = Object::ID(insn.word(3));
4811 auto lodId = Object::ID(insn.word(4));
4812
4813 auto &dst = state->routine->createIntermediate(resultId, resultTy.sizeInComponents);
4814 GetImageDimensions(state->routine, resultTy, imageId, lodId, dst);
4815
4816 return EmitResult::Continue;
4817 }
4818
4819 void SpirvShader::GetImageDimensions(SpirvRoutine const *routine, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
4820 {
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004821 auto &image = getObject(imageId);
4822 auto &imageType = getType(image.type);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004823
4824 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
4825 bool isArrayed = imageType.definition.word(5) != 0;
4826 bool isCubeMap = imageType.definition.word(3) == spv::DimCube;
4827
4828 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
Ben Clayton0264d8e2019-05-08 15:39:40 +01004829 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004830 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
4831
Ben Clayton0264d8e2019-05-08 15:39:40 +01004832 Pointer<Byte> descriptor = routine->getPointer(imageId).base;
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004833
Ben Clayton0264d8e2019-05-08 15:39:40 +01004834 Pointer<Int> extent;
4835 Int arrayLayers;
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004836
4837 switch (bindingLayout.descriptorType)
4838 {
4839 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4840 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4841 {
Ben Clayton0264d8e2019-05-08 15:39:40 +01004842 extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
4843 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
4844 break;
4845 }
4846 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
Chris Forbese2285022019-05-08 16:09:53 -07004847 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4848 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
Ben Clayton0264d8e2019-05-08 15:39:40 +01004849 {
4850 extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
4851 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004852 break;
4853 }
4854 default:
Ben Clayton92797c22019-04-25 10:44:03 +01004855 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004856 }
4857
Ben Clayton0264d8e2019-05-08 15:39:40 +01004858 auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
4859 std::vector<Int> out;
4860 if (lodId != 0)
4861 {
4862 auto lodVal = GenericValue(this, routine, lodId);
4863 ASSERT(getType(lodVal.type).sizeInComponents == 1);
4864 auto lod = lodVal.Int(0);
4865 auto one = SIMD::Int(1);
4866 for (uint32_t i = 0; i < dimensions; i++)
4867 {
4868 dst.move(i, Max(SIMD::Int(extent[i]) >> lod, one));
4869 }
4870 }
4871 else
4872 {
4873 for (uint32_t i = 0; i < dimensions; i++)
4874 {
4875 dst.move(i, SIMD::Int(extent[i]));
4876 }
4877 }
4878
4879 if (isArrayed)
4880 {
4881 auto numElements = isCubeMap ? (arrayLayers / 6) : RValue<Int>(arrayLayers);
4882 dst.move(dimensions, SIMD::Int(numElements));
4883 }
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004884 }
4885
Ben Claytonb4001ed2019-05-10 10:21:00 +01004886 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
4887 {
4888 auto &resultTy = getType(Type::ID(insn.word(1)));
4889 ASSERT(resultTy.sizeInComponents == 1);
4890 auto resultId = Object::ID(insn.word(2));
4891 auto imageId = Object::ID(insn.word(3));
4892
4893 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
4894 auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
4895 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
4896
4897 Pointer<Byte> descriptor = state->routine->getPointer(imageId).base;
4898 Int mipLevels = 0;
4899 switch (bindingLayout.descriptorType)
4900 {
4901 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4902 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4903 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
Ben Claytonb4001ed2019-05-10 10:21:00 +01004904 mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
4905 break;
Ben Claytonb4001ed2019-05-10 10:21:00 +01004906 default:
4907 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
4908 }
4909
4910 auto &dst = state->routine->createIntermediate(resultId, 1);
4911 dst.move(0, SIMD::Int(mipLevels));
4912
4913 return EmitResult::Continue;
4914 }
4915
Ben Clayton2568cf72019-05-10 11:53:14 +01004916 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
4917 {
4918 auto &resultTy = getType(Type::ID(insn.word(1)));
4919 ASSERT(resultTy.sizeInComponents == 1);
4920 auto resultId = Object::ID(insn.word(2));
4921 auto imageId = Object::ID(insn.word(3));
4922 auto imageTy = getType(getObject(imageId).type);
4923 ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
4924 ASSERT(imageTy.definition.word(3) == spv::Dim2D);
4925 ASSERT(imageTy.definition.word(6 /* MS */) == 1);
4926
4927 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
4928 auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
4929 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
4930
4931 Pointer<Byte> descriptor = state->routine->getPointer(imageId).base;
4932 Int sampleCount = 0;
4933 switch (bindingLayout.descriptorType)
4934 {
4935 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4936 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
4937 break;
4938 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4939 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4940 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4941 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
4942 break;
4943 default:
4944 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
4945 }
4946
4947 auto &dst = state->routine->createIntermediate(resultId, 1);
4948 dst.move(0, SIMD::Int(sampleCount));
4949
4950 return EmitResult::Continue;
4951 }
4952
Chris Forbes011744e2019-05-06 14:21:45 -07004953 SIMD::Pointer SpirvShader::GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
Chris Forbes89c37a42019-04-17 18:28:33 -07004954 {
Chris Forbes89c37a42019-04-17 18:28:33 -07004955 bool isArrayed = imageType.definition.word(5) != 0;
Chris Forbes24466042019-04-22 10:54:23 -07004956 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
Chris Forbes89c37a42019-04-17 18:28:33 -07004957 int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
4958
Chris Forbes24466042019-04-22 10:54:23 -07004959 SIMD::Int u = coordinate.Int(0);
4960 SIMD::Int v = (getType(coordinate.type).sizeInComponents > 1) ? coordinate.Int(1) : RValue<SIMD::Int>(0);
4961 if (dim == spv::DimSubpassData)
4962 {
4963 u += routine->windowSpacePosition[0];
4964 v += routine->windowSpacePosition[1];
4965 }
4966
Chris Forbes011744e2019-05-06 14:21:45 -07004967 if (useStencilAspect)
4968 {
4969 // Adjust addressing for quad layout. Pitches are already correct for the stencil aspect.
4970 // In the quad-layout block, pixel order is [x0,y0 x1,y0 x0,y1 x1,y1]
4971 u = ((v & SIMD::Int(1)) << 1) | ((u << 1) - (u & SIMD::Int(1)));
4972 v &= SIMD::Int(~1);
4973 }
4974
4975 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
4976 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
4977 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
4978 auto slicePitch = SIMD::Int(
4979 *Pointer<Int>(descriptor + (useStencilAspect
4980 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
4981 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
4982 auto samplePitch = SIMD::Int(
4983 *Pointer<Int>(descriptor + (useStencilAspect
4984 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
4985 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
4986
Chris Forbes24466042019-04-22 10:54:23 -07004987 ptr += u * SIMD::Int(texelSize);
Chris Forbes89c37a42019-04-17 18:28:33 -07004988 if (dims > 1)
4989 {
Chris Forbes011744e2019-05-06 14:21:45 -07004990 ptr += v * rowPitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004991 }
4992 if (dims > 2)
4993 {
Chris Forbes011744e2019-05-06 14:21:45 -07004994 ptr += coordinate.Int(2) * slicePitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004995 }
4996 if (isArrayed)
4997 {
Chris Forbes011744e2019-05-06 14:21:45 -07004998 ptr += coordinate.Int(dims) * slicePitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004999 }
5000
Chris Forbes52a3bba2019-05-03 15:11:41 -07005001 if (sampleId.value())
5002 {
5003 GenericValue sample{this, routine, sampleId};
Chris Forbes011744e2019-05-06 14:21:45 -07005004 ptr += sample.Int(0) * samplePitch;
Chris Forbes52a3bba2019-05-03 15:11:41 -07005005 }
5006
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005007 return ptr;
Chris Forbes89c37a42019-04-17 18:28:33 -07005008 }
5009
Ben Claytonecfeede2019-05-08 08:51:01 +01005010 void SpirvShader::Yield(YieldResult res) const
5011 {
5012 rr::Yield(RValue<Int>(int(res)));
5013 }
5014
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005015 SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
5016 {
5017 auto &resultType = getType(Type::ID(insn.word(1)));
5018 auto imageId = Object::ID(insn.word(3));
5019 auto &image = getObject(imageId);
5020 auto &imageType = getType(image.type);
5021 Object::ID resultId = insn.word(2);
5022
Chris Forbes52a3bba2019-05-03 15:11:41 -07005023 Object::ID sampleId = 0;
5024
5025 if (insn.wordCount() > 5)
5026 {
5027 int operand = 6;
5028 auto imageOperands = insn.word(5);
5029 if (imageOperands & spv::ImageOperandsSampleMask)
5030 {
5031 sampleId = insn.word(operand++);
5032 imageOperands &= ~spv::ImageOperandsSampleMask;
5033 }
5034
5035 // Should be no remaining image operands.
5036 ASSERT(!imageOperands);
5037 }
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005038
5039 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
Chris Forbes24466042019-04-22 10:54:23 -07005040 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005041
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005042 auto coordinate = GenericValue(this, state->routine, insn.word(4));
Chris Forbes24466042019-04-22 10:54:23 -07005043 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005044
Chris Forbes24466042019-04-22 10:54:23 -07005045 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
5046 // the renderpass data instead. In all other cases, we can use the format in the instruction.
5047 auto vkFormat = (dim == spv::DimSubpassData)
5048 ? inputAttachmentFormats[d.InputAttachmentIndex]
5049 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
Chris Forbes011744e2019-05-06 14:21:45 -07005050
5051 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
5052 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
5053 auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
5054 getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
5055
5056 if (useStencilAspect)
5057 {
5058 vkFormat = VK_FORMAT_S8_UINT;
5059 }
5060
5061 auto pointer = state->routine->getPointer(imageId);
5062 Pointer<Byte> binding = pointer.base;
5063 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
5064 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
5065 : OFFSET(vk::StorageImageDescriptor, ptr)));
5066
5067 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5068
5069 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
5070
Chris Forbes24466042019-04-22 10:54:23 -07005071 auto texelSize = vk::Format(vkFormat).bytes();
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005072 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005073 auto texelPtr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005074
Chris Forbes24466042019-04-22 10:54:23 -07005075 SIMD::Int packed[4];
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005076 // Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
5077 // of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
5078 // TODO: specialize for small formats?
5079 for (auto i = 0; i < (texelSize + 3)/4; i++)
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005080 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005081 packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
5082 texelPtr += sizeof(float);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005083 }
5084
Chris Forbesa32d6302019-04-26 14:19:04 -07005085 // Format support requirements here come from two sources:
5086 // - Minimum required set of formats for loads from storage images
5087 // - Any format supported as a color or depth/stencil attachment, for input attachments
Chris Forbes24466042019-04-22 10:54:23 -07005088 switch(vkFormat)
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005089 {
Chris Forbes24466042019-04-22 10:54:23 -07005090 case VK_FORMAT_R32G32B32A32_SFLOAT:
5091 case VK_FORMAT_R32G32B32A32_SINT:
5092 case VK_FORMAT_R32G32B32A32_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005093 dst.move(0, packed[0]);
5094 dst.move(1, packed[1]);
5095 dst.move(2, packed[2]);
5096 dst.move(3, packed[3]);
5097 break;
Chris Forbes24466042019-04-22 10:54:23 -07005098 case VK_FORMAT_R32_SINT:
5099 case VK_FORMAT_R32_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005100 dst.move(0, packed[0]);
5101 // Fill remaining channels with 0,0,1 (of the correct type)
5102 dst.move(1, SIMD::Int(0));
5103 dst.move(2, SIMD::Int(0));
5104 dst.move(3, SIMD::Int(1));
5105 break;
Chris Forbes24466042019-04-22 10:54:23 -07005106 case VK_FORMAT_R32_SFLOAT:
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005107 case VK_FORMAT_D32_SFLOAT:
Chris Forbes011744e2019-05-06 14:21:45 -07005108 case VK_FORMAT_D32_SFLOAT_S8_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005109 dst.move(0, packed[0]);
5110 // Fill remaining channels with 0,0,1 (of the correct type)
5111 dst.move(1, SIMD::Float(0));
5112 dst.move(2, SIMD::Float(0));
5113 dst.move(3, SIMD::Float(1));
5114 break;
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005115 case VK_FORMAT_D16_UNORM:
5116 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
5117 dst.move(1, SIMD::Float(0));
5118 dst.move(2, SIMD::Float(0));
5119 dst.move(3, SIMD::Float(1));
5120 break;
Chris Forbes24466042019-04-22 10:54:23 -07005121 case VK_FORMAT_R16G16B16A16_SINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005122 dst.move(0, (packed[0] << 16) >> 16);
5123 dst.move(1, (packed[0]) >> 16);
5124 dst.move(2, (packed[1] << 16) >> 16);
5125 dst.move(3, (packed[1]) >> 16);
5126 break;
Chris Forbes24466042019-04-22 10:54:23 -07005127 case VK_FORMAT_R16G16B16A16_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005128 dst.move(0, packed[0] & SIMD::Int(0xffff));
5129 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5130 dst.move(2, packed[1] & SIMD::Int(0xffff));
5131 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
5132 break;
Chris Forbes24466042019-04-22 10:54:23 -07005133 case VK_FORMAT_R16G16B16A16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005134 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5135 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
5136 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
5137 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005138 break;
Chris Forbes24466042019-04-22 10:54:23 -07005139 case VK_FORMAT_R8G8B8A8_SNORM:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005140 dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5141 dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5142 dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5143 dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5144 break;
Chris Forbes24466042019-04-22 10:54:23 -07005145 case VK_FORMAT_R8G8B8A8_UNORM:
Chris Forbesa32d6302019-04-26 14:19:04 -07005146 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005147 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5148 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5149 dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5150 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5151 break;
Chris Forbesa32d6302019-04-26 14:19:04 -07005152 case VK_FORMAT_R8G8B8A8_SRGB:
5153 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
5154 dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5155 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5156 dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5157 dst.move(3, ::sRGBtoLinear(SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5158 break;
5159 case VK_FORMAT_B8G8R8A8_UNORM:
5160 dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5161 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5162 dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5163 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5164 break;
5165 case VK_FORMAT_B8G8R8A8_SRGB:
5166 dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5167 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5168 dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5169 dst.move(3, ::sRGBtoLinear(SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5170 break;
Chris Forbes24466042019-04-22 10:54:23 -07005171 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbesa32d6302019-04-26 14:19:04 -07005172 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005173 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5174 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5175 dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF)));
5176 dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF)));
5177 break;
Chris Forbes24466042019-04-22 10:54:23 -07005178 case VK_FORMAT_R8G8B8A8_SINT:
Chris Forbesa32d6302019-04-26 14:19:04 -07005179 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005180 dst.move(0, (packed[0] << 24) >> 24);
5181 dst.move(1, (packed[0] << 16) >> 24);
5182 dst.move(2, (packed[0] << 8) >> 24);
5183 dst.move(3, (packed[0]) >> 24);
5184 break;
Chris Forbesf5c89362019-04-26 13:41:41 -07005185 case VK_FORMAT_R8_UNORM:
5186 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5187 dst.move(1, SIMD::Float(0));
5188 dst.move(2, SIMD::Float(0));
5189 dst.move(3, SIMD::Float(1));
5190 break;
5191 case VK_FORMAT_R8_UINT:
Chris Forbes011744e2019-05-06 14:21:45 -07005192 case VK_FORMAT_S8_UINT:
Chris Forbesf5c89362019-04-26 13:41:41 -07005193 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5194 dst.move(1, SIMD::UInt(0));
5195 dst.move(2, SIMD::UInt(0));
5196 dst.move(3, SIMD::UInt(1));
5197 break;
5198 case VK_FORMAT_R8_SINT:
5199 dst.move(0, (packed[0] << 24) >> 24);
5200 dst.move(1, SIMD::Int(0));
5201 dst.move(2, SIMD::Int(0));
5202 dst.move(3, SIMD::Int(1));
5203 break;
5204 case VK_FORMAT_R8G8_UNORM:
5205 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5206 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5207 dst.move(2, SIMD::Float(0));
5208 dst.move(3, SIMD::Float(1));
5209 break;
5210 case VK_FORMAT_R8G8_UINT:
5211 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5212 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5213 dst.move(2, SIMD::UInt(0));
5214 dst.move(3, SIMD::UInt(1));
5215 break;
5216 case VK_FORMAT_R8G8_SINT:
5217 dst.move(0, (packed[0] << 24) >> 24);
5218 dst.move(1, (packed[0] << 16) >> 24);
5219 dst.move(2, SIMD::Int(0));
5220 dst.move(3, SIMD::Int(1));
5221 break;
Chris Forbesa32d6302019-04-26 14:19:04 -07005222 case VK_FORMAT_R16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005223 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
Chris Forbesa32d6302019-04-26 14:19:04 -07005224 dst.move(1, SIMD::Float(0));
5225 dst.move(2, SIMD::Float(0));
5226 dst.move(3, SIMD::Float(1));
5227 break;
5228 case VK_FORMAT_R16_UINT:
5229 dst.move(0, packed[0] & SIMD::Int(0xffff));
5230 dst.move(1, SIMD::UInt(0));
5231 dst.move(2, SIMD::UInt(0));
5232 dst.move(3, SIMD::UInt(1));
5233 break;
5234 case VK_FORMAT_R16_SINT:
5235 dst.move(0, (packed[0] << 16) >> 16);
5236 dst.move(1, SIMD::Int(0));
5237 dst.move(2, SIMD::Int(0));
5238 dst.move(3, SIMD::Int(1));
5239 break;
5240 case VK_FORMAT_R16G16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005241 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5242 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbesa32d6302019-04-26 14:19:04 -07005243 dst.move(2, SIMD::Float(0));
5244 dst.move(3, SIMD::Float(1));
5245 break;
5246 case VK_FORMAT_R16G16_UINT:
5247 dst.move(0, packed[0] & SIMD::Int(0xffff));
5248 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5249 dst.move(2, SIMD::UInt(0));
5250 dst.move(3, SIMD::UInt(1));
5251 break;
5252 case VK_FORMAT_R16G16_SINT:
5253 dst.move(0, (packed[0] << 16) >> 16);
5254 dst.move(1, (packed[0]) >> 16);
5255 dst.move(2, SIMD::Int(0));
5256 dst.move(3, SIMD::Int(1));
5257 break;
5258 case VK_FORMAT_R32G32_SINT:
5259 case VK_FORMAT_R32G32_UINT:
5260 dst.move(0, packed[0]);
5261 dst.move(1, packed[1]);
5262 dst.move(2, SIMD::Int(0));
5263 dst.move(3, SIMD::Int(1));
5264 break;
5265 case VK_FORMAT_R32G32_SFLOAT:
5266 dst.move(0, packed[0]);
5267 dst.move(1, packed[1]);
5268 dst.move(2, SIMD::Float(0));
5269 dst.move(3, SIMD::Float(1));
5270 break;
Chris Forbesdcc9fd72019-05-03 07:35:14 -07005271 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
5272 dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
5273 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
5274 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
5275 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
5276 break;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01005277 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
5278 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5279 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5280 dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5281 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
5282 break;
Chris Forbes8aba20f2019-05-03 09:06:48 -07005283 case VK_FORMAT_R5G6B5_UNORM_PACK16:
5284 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5285 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
5286 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5287 dst.move(3, SIMD::Float(1));
5288 break;
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005289 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005290 UNIMPLEMENTED("spv::ImageFormat %d", int(vkFormat));
5291 break;
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005292 }
5293
5294 return EmitResult::Continue;
5295 }
5296
Chris Forbes179f0142019-04-17 20:24:44 -07005297 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
5298 {
5299 auto imageId = Object::ID(insn.word(1));
5300 auto &image = getObject(imageId);
5301 auto &imageType = getType(image.type);
5302
5303 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5304
5305 // Not handling any image operands yet.
5306 ASSERT(insn.wordCount() == 4);
5307
Chris Forbes179f0142019-04-17 20:24:44 -07005308 auto coordinate = GenericValue(this, state->routine, insn.word(2));
5309 auto texel = GenericValue(this, state->routine, insn.word(3));
5310
Chris Forbes621a7bd2019-04-19 08:28:00 -07005311 Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Chris Forbes179f0142019-04-17 20:24:44 -07005312 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005313 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
Chris Forbes179f0142019-04-17 20:24:44 -07005314
5315 SIMD::Int packed[4];
5316 auto numPackedElements = 0u;
5317 int texelSize = 0;
5318 auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
5319 switch (format)
5320 {
5321 case spv::ImageFormatRgba32f:
5322 case spv::ImageFormatRgba32i:
5323 case spv::ImageFormatRgba32ui:
5324 texelSize = 16;
5325 packed[0] = texel.Int(0);
5326 packed[1] = texel.Int(1);
5327 packed[2] = texel.Int(2);
5328 packed[3] = texel.Int(3);
5329 numPackedElements = 4;
5330 break;
5331 case spv::ImageFormatR32f:
5332 case spv::ImageFormatR32i:
5333 case spv::ImageFormatR32ui:
5334 texelSize = 4;
5335 packed[0] = texel.Int(0);
5336 numPackedElements = 1;
5337 break;
5338 case spv::ImageFormatRgba8:
5339 texelSize = 4;
5340 packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
5341 ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
5342 ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
5343 ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
5344 numPackedElements = 1;
5345 break;
5346 case spv::ImageFormatRgba8Snorm:
5347 texelSize = 4;
5348 packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5349 SIMD::Int(0xFF)) |
5350 ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5351 SIMD::Int(0xFF)) << 8) |
5352 ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5353 SIMD::Int(0xFF)) << 16) |
5354 ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5355 SIMD::Int(0xFF)) << 24);
5356 numPackedElements = 1;
5357 break;
5358 case spv::ImageFormatRgba8i:
5359 case spv::ImageFormatRgba8ui:
5360 texelSize = 4;
5361 packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
5362 (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
5363 (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
5364 (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
5365 numPackedElements = 1;
5366 break;
5367 case spv::ImageFormatRgba16f:
5368 texelSize = 8;
5369 packed[0] = FloatToHalfBits(texel.UInt(0), false) | FloatToHalfBits(texel.UInt(1), true);
5370 packed[1] = FloatToHalfBits(texel.UInt(2), false) | FloatToHalfBits(texel.UInt(3), true);
5371 numPackedElements = 2;
5372 break;
5373 case spv::ImageFormatRgba16i:
5374 case spv::ImageFormatRgba16ui:
5375 texelSize = 8;
5376 packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
5377 packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
5378 numPackedElements = 2;
5379 break;
Ben Clayton92797c22019-04-25 10:44:03 +01005380 case spv::ImageFormatRg32f:
5381 case spv::ImageFormatRg16f:
5382 case spv::ImageFormatR11fG11fB10f:
5383 case spv::ImageFormatR16f:
5384 case spv::ImageFormatRgba16:
5385 case spv::ImageFormatRgb10A2:
5386 case spv::ImageFormatRg16:
5387 case spv::ImageFormatRg8:
5388 case spv::ImageFormatR16:
5389 case spv::ImageFormatR8:
5390 case spv::ImageFormatRgba16Snorm:
5391 case spv::ImageFormatRg16Snorm:
5392 case spv::ImageFormatRg8Snorm:
5393 case spv::ImageFormatR16Snorm:
5394 case spv::ImageFormatR8Snorm:
5395 case spv::ImageFormatRg32i:
5396 case spv::ImageFormatRg16i:
5397 case spv::ImageFormatRg8i:
5398 case spv::ImageFormatR16i:
5399 case spv::ImageFormatR8i:
5400 case spv::ImageFormatRgb10a2ui:
5401 case spv::ImageFormatRg32ui:
5402 case spv::ImageFormatRg16ui:
5403 case spv::ImageFormatRg8ui:
5404 case spv::ImageFormatR16ui:
5405 case spv::ImageFormatR8ui:
5406 UNIMPLEMENTED("spv::ImageFormat %d", int(format));
5407 break;
5408
Chris Forbes179f0142019-04-17 20:24:44 -07005409 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005410 UNREACHABLE("spv::ImageFormat %d", int(format));
5411 break;
Chris Forbes179f0142019-04-17 20:24:44 -07005412 }
5413
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005414 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005415 auto texelPtr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, texelSize, 0, false);
Chris Forbes179f0142019-04-17 20:24:44 -07005416
5417 for (auto i = 0u; i < numPackedElements; i++)
5418 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005419 SIMD::Store(texelPtr, packed[i], state->activeLaneMask());
5420 texelPtr += sizeof(float);
Chris Forbes179f0142019-04-17 20:24:44 -07005421 }
5422
5423 return EmitResult::Continue;
5424 }
5425
Chris Forbesb51f2c12019-04-18 11:01:30 -07005426 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState *state) const
5427 {
5428 auto &resultType = getType(Type::ID(insn.word(1)));
5429 auto imageId = Object::ID(insn.word(3));
5430 auto &image = getObject(imageId);
5431 // Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
5432 // Look through to get the actual image type.
5433 auto &imageType = getType(getType(image.type).element);
5434 Object::ID resultId = insn.word(2);
5435
5436 ASSERT(imageType.opcode() == spv::OpTypeImage);
5437 ASSERT(resultType.storageClass == spv::StorageClassImage);
5438 ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
5439
Chris Forbesb51f2c12019-04-18 11:01:30 -07005440 auto coordinate = GenericValue(this, state->routine, insn.word(4));
5441
Chris Forbes621a7bd2019-04-19 08:28:00 -07005442 Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Chris Forbesb51f2c12019-04-18 11:01:30 -07005443 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005444 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
Chris Forbesb51f2c12019-04-18 11:01:30 -07005445
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005446 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005447 auto ptr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, sizeof(uint32_t), 0, false);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005448
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005449 state->routine->createPointer(resultId, ptr);
Chris Forbesb51f2c12019-04-18 11:01:30 -07005450
5451 return EmitResult::Continue;
5452 }
5453
Chris Forbesfa82c342019-04-26 16:42:38 -07005454 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
5455 {
5456 // Propagate the image pointer in both cases.
5457 // Consumers of OpSampledImage will look through to find the sampler pointer.
5458
5459 Object::ID resultId = insn.word(2);
5460 Object::ID imageId = insn.word(3);
5461
5462 state->routine->createPointer(resultId, state->routine->getPointer(imageId));
5463
5464 return EmitResult::Continue;
5465 }
5466
Chris Forbes17813932019-04-18 11:45:54 -07005467 SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState *state) const
5468 {
5469 auto &resultType = getType(Type::ID(insn.word(1)));
5470 Object::ID resultId = insn.word(2);
5471 Object::ID semanticsId = insn.word(5);
5472 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
5473 auto memoryOrder = MemoryOrder(memorySemantics);
Chris Forbes707ed992019-04-18 18:17:35 -07005474 // Where no value is provided (increment/decrement) use an implicit value of 1.
5475 auto value = (insn.wordCount() == 7) ? GenericValue(this, state->routine, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
Chris Forbes17813932019-04-18 11:45:54 -07005476 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005477 auto ptr = state->routine->getPointer(insn.word(3));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005478 auto ptrOffsets = ptr.offsets();
Chris Forbes17813932019-04-18 11:45:54 -07005479
5480 SIMD::UInt x;
5481 for (int j = 0; j < SIMD::Width; j++)
5482 {
5483 If(Extract(state->activeLaneMask(), j) != 0)
5484 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005485 auto offset = Extract(ptrOffsets, j);
Chris Forbes707ed992019-04-18 18:17:35 -07005486 auto laneValue = Extract(value, j);
Chris Forbes17813932019-04-18 11:45:54 -07005487 UInt v;
5488 switch (insn.opcode())
5489 {
5490 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -07005491 case spv::OpAtomicIIncrement:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005492 v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005493 break;
Chris Forbes707ed992019-04-18 18:17:35 -07005494 case spv::OpAtomicISub:
5495 case spv::OpAtomicIDecrement:
5496 v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5497 break;
Chris Forbes17813932019-04-18 11:45:54 -07005498 case spv::OpAtomicAnd:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005499 v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005500 break;
5501 case spv::OpAtomicOr:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005502 v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005503 break;
5504 case spv::OpAtomicXor:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005505 v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005506 break;
5507 case spv::OpAtomicSMin:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005508 v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
Chris Forbes17813932019-04-18 11:45:54 -07005509 break;
5510 case spv::OpAtomicSMax:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005511 v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
Chris Forbes17813932019-04-18 11:45:54 -07005512 break;
5513 case spv::OpAtomicUMin:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005514 v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005515 break;
5516 case spv::OpAtomicUMax:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005517 v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005518 break;
5519 case spv::OpAtomicExchange:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005520 v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005521 break;
5522 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005523 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Chris Forbes17813932019-04-18 11:45:54 -07005524 break;
5525 }
5526 x = Insert(x, v, j);
5527 }
5528 }
5529
5530 dst.move(0, x);
5531 return EmitResult::Continue;
5532 }
5533
Chris Forbesa16238d2019-04-18 16:31:54 -07005534 SpirvShader::EmitResult SpirvShader::EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const
5535 {
5536 // Separate from EmitAtomicOp due to different instruction encoding
5537 auto &resultType = getType(Type::ID(insn.word(1)));
5538 Object::ID resultId = insn.word(2);
5539
5540 auto memorySemanticsEqual = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(5)).constantValue[0]);
5541 auto memoryOrderEqual = MemoryOrder(memorySemanticsEqual);
5542 auto memorySemanticsUnequal = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(6)).constantValue[0]);
5543 auto memoryOrderUnequal = MemoryOrder(memorySemanticsUnequal);
5544
5545 auto value = GenericValue(this, state->routine, insn.word(7));
5546 auto comparator = GenericValue(this, state->routine, insn.word(8));
5547 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005548 auto ptr = state->routine->getPointer(insn.word(3));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005549 auto ptrOffsets = ptr.offsets();
Chris Forbesa16238d2019-04-18 16:31:54 -07005550
5551 SIMD::UInt x;
5552 for (int j = 0; j < SIMD::Width; j++)
5553 {
5554 If(Extract(state->activeLaneMask(), j) != 0)
5555 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005556 auto offset = Extract(ptrOffsets, j);
Chris Forbesa16238d2019-04-18 16:31:54 -07005557 auto laneValue = Extract(value.UInt(0), j);
5558 auto laneComparator = Extract(comparator.UInt(0), j);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005559 UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
Chris Forbesa16238d2019-04-18 16:31:54 -07005560 x = Insert(x, v, j);
5561 }
5562 }
5563
5564 dst.move(0, x);
5565 return EmitResult::Continue;
5566 }
5567
Ben Clayton78abf372019-05-09 15:11:58 +01005568 SpirvShader::EmitResult SpirvShader::EmitCopyObject(InsnIterator insn, EmitState *state) const
5569 {
5570 auto ty = getType(insn.word(1));
5571 auto &dst = state->routine->createIntermediate(insn.word(2), ty.sizeInComponents);
5572 auto src = GenericValue(this, state->routine, insn.word(3));
5573 for (uint32_t i = 0; i < ty.sizeInComponents; i++)
5574 {
5575 dst.move(i, src.Int(i));
5576 }
5577 return EmitResult::Continue;
5578 }
5579
Ben Claytonb5a45462019-04-30 19:21:29 +01005580 SpirvShader::EmitResult SpirvShader::EmitCopyMemory(InsnIterator insn, EmitState *state) const
5581 {
5582 Object::ID dstPtrId = insn.word(1);
5583 Object::ID srcPtrId = insn.word(2);
5584 auto &dstPtrTy = getType(getObject(dstPtrId).type);
5585 auto &srcPtrTy = getType(getObject(srcPtrId).type);
5586 ASSERT(dstPtrTy.element == srcPtrTy.element);
5587
5588 bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
5589 bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
Ben Clayton44741082019-05-10 11:03:14 +01005590 auto dstPtr = GetPointerToData(dstPtrId, 0, state->routine);
5591 auto srcPtr = GetPointerToData(srcPtrId, 0, state->routine);
Ben Claytonb5a45462019-04-30 19:21:29 +01005592
5593 std::unordered_map<uint32_t, uint32_t> srcOffsets;
5594
5595 VisitMemoryObject(srcPtrId, [&](uint32_t i, uint32_t srcOffset) { srcOffsets[i] = srcOffset; });
5596
5597 VisitMemoryObject(dstPtrId, [&](uint32_t i, uint32_t dstOffset)
5598 {
5599 auto it = srcOffsets.find(i);
5600 ASSERT(it != srcOffsets.end());
5601 auto srcOffset = it->second;
5602
5603 auto dst = dstPtr + dstOffset;
5604 auto src = srcPtr + srcOffset;
5605 if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
5606 if (srcInterleavedByLane) { src = interleaveByLane(src); }
5607 SIMD::Store(dst, SIMD::Load<SIMD::Float>(src, state->activeLaneMask()), state->activeLaneMask());
5608 });
5609 return EmitResult::Continue;
5610 }
5611
Ben Claytonecfeede2019-05-08 08:51:01 +01005612 SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
5613 {
5614 auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
5615 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
5616 // TODO: We probably want to consider the memory scope here. For now,
5617 // just always emit the full fence.
5618 Fence(semantics);
5619
5620 switch (executionScope)
5621 {
5622 case spv::ScopeWorkgroup:
5623 case spv::ScopeSubgroup:
5624 Yield(YieldResult::ControlBarrier);
5625 break;
5626 default:
5627 // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
5628 UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
5629 break;
5630 }
5631
5632 return EmitResult::Continue;
5633 }
5634
Ben Claytonb16c5862019-05-08 14:01:38 +01005635 SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
5636 {
5637 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
5638 // TODO: We probably want to consider the memory scope here. For now,
5639 // just always emit the full fence.
5640 Fence(semantics);
5641 return EmitResult::Continue;
5642 }
5643
5644 void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
5645 {
5646 if (semantics == spv::MemorySemanticsMaskNone)
5647 {
5648 return; //no-op
5649 }
5650 rr::Fence(MemoryOrder(semantics));
5651 }
5652
Ben Clayton32d47972019-04-19 17:08:15 -04005653 SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
5654 {
5655 auto &type = getType(Type::ID(insn.word(1)));
5656 Object::ID resultId = insn.word(2);
Ben Claytonb16c5862019-05-08 14:01:38 +01005657 auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
Ben Clayton32d47972019-04-19 17:08:15 -04005658 ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
5659
5660 auto &dst = state->routine->createIntermediate(resultId, type.sizeInComponents);
5661
5662 switch (insn.opcode())
5663 {
5664 case spv::OpGroupNonUniformElect:
5665 {
5666 // Result is true only in the active invocation with the lowest id
5667 // in the group, otherwise result is false.
5668 SIMD::Int active = state->activeLaneMask();
5669 // TODO: Would be nice if we could write this as:
5670 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
5671 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
5672 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
5673 dst.move(0, elect);
5674 break;
5675 }
5676 default:
5677 UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
5678 }
5679 return EmitResult::Continue;
5680 }
5681
Ben Claytone4605da2019-05-09 16:24:01 +01005682 SpirvShader::EmitResult SpirvShader::EmitArrayLength(InsnIterator insn, EmitState *state) const
5683 {
5684 auto resultTyId = Type::ID(insn.word(1));
5685 auto resultId = Object::ID(insn.word(2));
5686 auto structPtrId = Object::ID(insn.word(3));
5687 auto arrayFieldIdx = insn.word(4);
5688
5689 auto &resultType = getType(resultTyId);
5690 ASSERT(resultType.sizeInComponents == 1);
5691 ASSERT(resultType.definition.opcode() == spv::OpTypeInt);
5692
5693 auto &structPtrTy = getType(getObject(structPtrId).type);
5694 auto &structTy = getType(structPtrTy.element);
5695 auto &arrayTy = getType(structTy.definition.word(2 + arrayFieldIdx));
5696 ASSERT(arrayTy.definition.opcode() == spv::OpTypeRuntimeArray);
5697 auto &arrayElTy = getType(arrayTy.element);
5698
5699 auto &result = state->routine->createIntermediate(resultId, 1);
5700 auto structBase = GetPointerToData(structPtrId, 0, state->routine);
5701
5702 Decorations d = {};
5703 ApplyDecorationsForIdMember(&d, structPtrTy.element, arrayFieldIdx);
5704 ASSERT(d.HasOffset);
5705
5706 auto arrayBase = structBase + d.Offset;
5707 auto arraySizeInBytes = SIMD::Int(arrayBase.limit) - arrayBase.offsets();
5708 auto arrayLength = arraySizeInBytes / SIMD::Int(arrayElTy.sizeInComponents * sizeof(float));
5709
5710 result.move(0, SIMD::Int(arrayLength));
5711
5712 return EmitResult::Continue;
5713 }
5714
Ben Claytonb16c5862019-05-08 14:01:38 +01005715 uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
Ben Clayton32d47972019-04-19 17:08:15 -04005716 {
5717 auto &scopeObj = getObject(id);
5718 ASSERT(scopeObj.kind == Object::Kind::Constant);
5719 ASSERT(getType(scopeObj.type).sizeInComponents == 1);
Ben Claytonb16c5862019-05-08 14:01:38 +01005720 return scopeObj.constantValue[0];
Ben Clayton32d47972019-04-19 17:08:15 -04005721 }
5722
Chris Forbesea81ab72019-05-14 15:20:33 -07005723 void SpirvShader::EvalSpecConstantOp(InsnIterator insn)
5724 {
5725 auto opcode = static_cast<spv::Op>(insn.word(3));
5726
5727 switch (opcode)
5728 {
5729 case spv::OpIAdd:
5730 case spv::OpISub:
5731 case spv::OpIMul:
5732 case spv::OpUDiv:
5733 case spv::OpSDiv:
5734 case spv::OpUMod:
5735 case spv::OpSMod:
5736 case spv::OpSRem:
5737 case spv::OpShiftRightLogical:
5738 case spv::OpShiftRightArithmetic:
5739 case spv::OpShiftLeftLogical:
5740 case spv::OpBitwiseOr:
5741 case spv::OpLogicalOr:
5742 case spv::OpBitwiseAnd:
5743 case spv::OpLogicalAnd:
5744 case spv::OpBitwiseXor:
5745 case spv::OpLogicalEqual:
5746 case spv::OpIEqual:
5747 case spv::OpLogicalNotEqual:
5748 case spv::OpINotEqual:
5749 case spv::OpULessThan:
5750 case spv::OpSLessThan:
5751 case spv::OpUGreaterThan:
5752 case spv::OpSGreaterThan:
5753 case spv::OpULessThanEqual:
5754 case spv::OpSLessThanEqual:
5755 case spv::OpUGreaterThanEqual:
5756 case spv::OpSGreaterThanEqual:
5757 EvalSpecConstantBinaryOp(insn);
5758 break;
5759
5760 case spv::OpSConvert:
5761 case spv::OpFConvert:
5762 case spv::OpUConvert:
5763 case spv::OpSNegate:
5764 case spv::OpNot:
5765 case spv::OpLogicalNot:
5766 case spv::OpQuantizeToF16:
5767 EvalSpecConstantUnaryOp(insn);
5768 break;
5769
5770 case spv::OpSelect:
5771 {
5772 auto &result = CreateConstant(insn);
5773 auto const &cond = getObject(insn.word(4));
5774 auto const &left = getObject(insn.word(5));
5775 auto const &right = getObject(insn.word(6));
5776
5777 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
5778 {
5779 result.constantValue[i] = cond.constantValue[i] ? left.constantValue[i] : right.constantValue[i];
5780 }
5781 break;
5782 }
5783
5784 case spv::OpCompositeExtract:
5785 {
5786 auto &result = CreateConstant(insn);
5787 auto const &compositeObject = getObject(insn.word(4));
5788 auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
5789
5790 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
5791 {
5792 result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
5793 }
5794 break;
5795 }
5796
5797 case spv::OpCompositeInsert:
5798 {
5799 auto &result = CreateConstant(insn);
5800 auto const &newPart = getObject(insn.word(4));
5801 auto const &oldObject = getObject(insn.word(5));
5802 auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
5803
5804 // old components before
5805 for (auto i = 0u; i < firstNewComponent; i++)
5806 {
5807 result.constantValue[i] = oldObject.constantValue[i];
5808 }
5809 // new part
5810 for (auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
5811 {
5812 result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
5813 }
5814 // old components after
5815 for (auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
5816 {
5817 result.constantValue[i] = oldObject.constantValue[i];
5818 }
5819 break;
5820 }
5821
5822 case spv::OpVectorShuffle:
5823 {
5824 auto &result = CreateConstant(insn);
5825 auto const &firstHalf = getObject(insn.word(4));
5826 auto const &secondHalf = getObject(insn.word(5));
5827
5828 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
5829 {
5830 auto selector = insn.word(6 + i);
5831 if (selector == static_cast<uint32_t>(-1))
5832 {
5833 // Undefined value, we'll use zero
5834 result.constantValue[i] = 0;
5835 }
5836 else if (selector < getType(firstHalf.type).sizeInComponents)
5837 {
5838 result.constantValue[i] = firstHalf.constantValue[selector];
5839 }
5840 else
5841 {
5842 result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
5843 }
5844 }
5845 break;
5846 }
5847
5848 default:
5849 // Other spec constant ops are possible, but require capabilities that are
5850 // not exposed in our Vulkan implementation (eg Kernel), so we should never
5851 // get here for correct shaders.
5852 UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
5853 }
5854 }
5855
5856 void SpirvShader::EvalSpecConstantUnaryOp(InsnIterator insn)
5857 {
5858 auto &result = CreateConstant(insn);
5859
5860 auto opcode = static_cast<spv::Op>(insn.word(3));
5861 auto const &lhs = getObject(insn.word(4));
5862 auto size = getType(lhs.type).sizeInComponents;
5863
5864 for (auto i = 0u; i < size; i++)
5865 {
5866 auto &v = result.constantValue[i];
5867 auto l = lhs.constantValue[i];
5868
5869 switch (opcode)
5870 {
5871 case spv::OpSConvert:
5872 case spv::OpFConvert:
5873 case spv::OpUConvert:
5874 UNREACHABLE("Not possible until we have multiple bit widths");
5875 break;
5876
5877 case spv::OpSNegate:
5878 v = -l;
5879 break;
5880 case spv::OpNot:
5881 case spv::OpLogicalNot:
5882 v = ~l;
5883 break;
5884
5885 case spv::OpQuantizeToF16:
5886 {
5887 // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
5888 auto abs = bit_cast<float>(l & 0x7FFFFFFF);
5889 auto sign = l & 0x80000000;
5890 auto isZero = abs < 0.000061035f ? ~0u : 0u;
5891 auto isInf = abs > 65504.0f ? ~0u : 0u;
5892 auto isNaN = (abs != abs) ? ~0u : 0u;
5893 auto isInfOrNan = isInf | isNaN;
5894 v = l & 0xFFFFE000;
5895 v &= ~isZero | 0x80000000;
5896 v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
5897 v |= isNaN & 0x400000;
5898 break;
5899 }
5900 default:
5901 UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
5902 }
5903 }
5904 }
5905
5906 void SpirvShader::EvalSpecConstantBinaryOp(InsnIterator insn)
5907 {
5908 auto &result = CreateConstant(insn);
5909
5910 auto opcode = static_cast<spv::Op>(insn.word(3));
5911 auto const &lhs = getObject(insn.word(4));
5912 auto const &rhs = getObject(insn.word(5));
5913 auto size = getType(lhs.type).sizeInComponents;
5914
5915 for (auto i = 0u; i < size; i++)
5916 {
5917 auto &v = result.constantValue[i];
5918 auto l = lhs.constantValue[i];
5919 auto r = rhs.constantValue[i];
5920
5921 switch (opcode)
5922 {
5923 case spv::OpIAdd:
5924 v = l + r;
5925 break;
5926 case spv::OpISub:
5927 v = l - r;
5928 break;
5929 case spv::OpIMul:
5930 v = l * r;
5931 break;
5932 case spv::OpUDiv:
5933 v = (r == 0) ? 0 : l / r;
5934 break;
5935 case spv::OpUMod:
5936 v = (r == 0) ? 0 : l % r;
5937 break;
5938 case spv::OpSDiv:
5939 if (r == 0) r = UINT32_MAX;
5940 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
5941 v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
5942 break;
5943 case spv::OpSRem:
5944 if (r == 0) r = UINT32_MAX;
5945 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
5946 v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
5947 break;
5948 case spv::OpSMod:
5949 if (r == 0) r = UINT32_MAX;
5950 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
5951 if (l * r < 0)
5952 v = static_cast<int32_t>(l) % static_cast<int32_t>(r) + r;
5953 else
5954 v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
5955 break;
5956 case spv::OpShiftRightLogical:
5957 v = l >> r;
5958 break;
5959 case spv::OpShiftRightArithmetic:
5960 v = static_cast<int32_t>(l) >> r;
5961 break;
5962 case spv::OpShiftLeftLogical:
5963 v = l << r;
5964 break;
5965 case spv::OpBitwiseOr:
5966 case spv::OpLogicalOr:
5967 v = l | r;
5968 break;
5969 case spv::OpBitwiseAnd:
5970 case spv::OpLogicalAnd:
5971 v = l & r;
5972 break;
5973 case spv::OpBitwiseXor:
5974 v = l ^ r;
5975 break;
5976 case spv::OpLogicalEqual:
5977 case spv::OpIEqual:
5978 v = (l == r) ? ~0u : 0u;
5979 break;
5980 case spv::OpLogicalNotEqual:
5981 case spv::OpINotEqual:
5982 v = (l != r) ? ~0u : 0u;
5983 break;
5984 case spv::OpULessThan:
5985 v = l < r ? ~0u : 0u;
5986 break;
5987 case spv::OpSLessThan:
5988 v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
5989 break;
5990 case spv::OpUGreaterThan:
5991 v = l > r ? ~0u : 0u;
5992 break;
5993 case spv::OpSGreaterThan:
5994 v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
5995 break;
5996 case spv::OpULessThanEqual:
5997 v = l <= r ? ~0u : 0u;
5998 break;
5999 case spv::OpSLessThanEqual:
6000 v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
6001 break;
6002 case spv::OpUGreaterThanEqual:
6003 v = l >= r ? ~0u : 0u;
6004 break;
6005 case spv::OpSGreaterThanEqual:
6006 v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
6007 break;
6008 default:
6009 UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
6010 }
6011 }
6012 }
6013
Chris Forbesc61271e2019-02-19 17:01:28 -08006014 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
6015 {
6016 for (auto insn : *this)
6017 {
6018 switch (insn.opcode())
6019 {
6020 case spv::OpVariable:
6021 {
Ben Claytonaf973b62019-03-13 18:19:20 +00006022 Object::ID resultId = insn.word(2);
Chris Forbesc61271e2019-02-19 17:01:28 -08006023 auto &object = getObject(resultId);
Ben Clayton9a162482019-02-25 11:54:43 +00006024 auto &objectTy = getType(object.type);
6025 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
Chris Forbesc61271e2019-02-19 17:01:28 -08006026 {
Ben Clayton47747612019-04-04 16:27:35 +01006027 auto &dst = routine->getVariable(resultId);
Chris Forbesc61271e2019-02-19 17:01:28 -08006028 int offset = 0;
6029 VisitInterface(resultId,
6030 [&](Decorations const &d, AttribType type) {
6031 auto scalarSlot = d.Location << 2 | d.Component;
6032 routine->outputs[scalarSlot] = dst[offset++];
6033 });
6034 }
6035 break;
6036 }
6037 default:
6038 break;
6039 }
6040 }
6041 }
Ben Clayton76e9bc02019-02-26 15:02:18 +00006042
Ben Clayton64f78f52019-03-21 17:21:06 +00006043 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
6044 {
6045 // Default to a Simple, this may change later.
6046 kind = Block::Simple;
6047
6048 // Walk the instructions to find the last two of the block.
6049 InsnIterator insns[2];
6050 for (auto insn : *this)
6051 {
6052 insns[0] = insns[1];
6053 insns[1] = insn;
6054 }
6055
6056 switch (insns[1].opcode())
6057 {
6058 case spv::OpBranch:
6059 branchInstruction = insns[1];
6060 outs.emplace(Block::ID(branchInstruction.word(1)));
6061
6062 switch (insns[0].opcode())
6063 {
6064 case spv::OpLoopMerge:
6065 kind = Loop;
6066 mergeInstruction = insns[0];
6067 mergeBlock = Block::ID(mergeInstruction.word(1));
6068 continueTarget = Block::ID(mergeInstruction.word(2));
6069 break;
6070
6071 default:
6072 kind = Block::Simple;
6073 break;
6074 }
6075 break;
6076
6077 case spv::OpBranchConditional:
6078 branchInstruction = insns[1];
6079 outs.emplace(Block::ID(branchInstruction.word(2)));
6080 outs.emplace(Block::ID(branchInstruction.word(3)));
6081
6082 switch (insns[0].opcode())
6083 {
6084 case spv::OpSelectionMerge:
6085 kind = StructuredBranchConditional;
6086 mergeInstruction = insns[0];
6087 mergeBlock = Block::ID(mergeInstruction.word(1));
6088 break;
6089
6090 case spv::OpLoopMerge:
6091 kind = Loop;
6092 mergeInstruction = insns[0];
6093 mergeBlock = Block::ID(mergeInstruction.word(1));
6094 continueTarget = Block::ID(mergeInstruction.word(2));
6095 break;
6096
6097 default:
6098 kind = UnstructuredBranchConditional;
6099 break;
6100 }
6101 break;
6102
6103 case spv::OpSwitch:
6104 branchInstruction = insns[1];
6105 outs.emplace(Block::ID(branchInstruction.word(2)));
6106 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
6107 {
6108 outs.emplace(Block::ID(branchInstruction.word(w)));
6109 }
6110
6111 switch (insns[0].opcode())
6112 {
6113 case spv::OpSelectionMerge:
6114 kind = StructuredSwitch;
6115 mergeInstruction = insns[0];
6116 mergeBlock = Block::ID(mergeInstruction.word(1));
6117 break;
6118
6119 default:
6120 kind = UnstructuredSwitch;
6121 break;
6122 }
6123 break;
6124
6125 default:
6126 break;
6127 }
6128 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00006129
Ben Clayton513ed1d2019-03-28 16:07:00 +00006130 bool SpirvShader::existsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
Ben Claytone747b3c2019-03-21 19:35:15 +00006131 {
6132 // TODO: Optimize: This can be cached on the block.
6133 Block::Set seen;
Ben Clayton513ed1d2019-03-28 16:07:00 +00006134 seen.emplace(notPassingThrough);
Ben Claytone747b3c2019-03-21 19:35:15 +00006135
6136 std::queue<Block::ID> pending;
6137 pending.emplace(from);
6138
6139 while (pending.size() > 0)
6140 {
6141 auto id = pending.front();
6142 pending.pop();
6143 for (auto out : getBlock(id).outs)
6144 {
6145 if (seen.count(out) != 0) { continue; }
6146 if (out == to) { return true; }
6147 pending.emplace(out);
6148 }
6149 seen.emplace(id);
6150 }
6151
6152 return false;
6153 }
6154
Ben Claytonc0cf68b2019-03-21 17:46:08 +00006155 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
6156 {
6157 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
6158 }
6159
6160 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
6161 {
6162 auto edge = Block::Edge{from, to};
6163 auto it = edgeActiveLaneMasks.find(edge);
6164 if (it == edgeActiveLaneMasks.end())
6165 {
6166 edgeActiveLaneMasks.emplace(edge, mask);
6167 }
6168 else
6169 {
6170 auto combined = it->second | mask;
6171 edgeActiveLaneMasks.erase(edge);
6172 edgeActiveLaneMasks.emplace(edge, combined);
6173 }
6174 }
6175
Ben Claytonfe3f0132019-03-26 11:10:16 +00006176 RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
Ben Claytonc0cf68b2019-03-21 17:46:08 +00006177 {
6178 auto edge = Block::Edge{from, to};
Ben Claytonfe3f0132019-03-26 11:10:16 +00006179 auto it = state->edgeActiveLaneMasks.find(edge);
6180 ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
Ben Claytonc0cf68b2019-03-21 17:46:08 +00006181 return it->second;
6182 }
6183
Ben Clayton60f15ec2019-05-09 17:50:01 +01006184 VkShaderStageFlagBits SpirvShader::executionModelToStage(spv::ExecutionModel model)
6185 {
6186 switch (model)
6187 {
6188 case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
6189 // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
6190 // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
6191 // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
6192 case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
6193 case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
6194 // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
6195 // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
6196 // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
6197 // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
6198 // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
6199 // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
6200 // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
6201 // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
6202 // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
6203 default:
6204 UNSUPPORTED("ExecutionModel: %d", int(model));
6205 return VkShaderStageFlagBits(0);
6206 }
6207 }
6208
Ben Clayton76e9bc02019-02-26 15:02:18 +00006209 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
6210 pipelineLayout(pipelineLayout)
6211 {
6212 }
6213
Chris Forbesc25b8072018-12-10 15:10:39 -08006214}