blob: 3999d3d34541d81fefa32aa3825b8ec3769c92f8 [file] [log] [blame]
Chris Forbesaf4ed532018-12-06 18:33:27 -08001// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Chris Forbesaf4ed532018-12-06 18:33:27 -080015#include "SpirvShader.hpp"
Nicolas Capens7d867272019-04-08 22:51:08 -040016#include "SamplerCore.hpp"
Ben Claytonecfeede2019-05-08 08:51:01 +010017
18#include "Reactor/Coroutine.hpp"
Chris Forbesaf4ed532018-12-06 18:33:27 -080019#include "System/Math.hpp"
Ben Claytonefec1b92019-03-05 17:38:16 +000020#include "Vulkan/VkBuffer.hpp"
Chris Forbes58228822019-04-17 12:51:29 -070021#include "Vulkan/VkBufferView.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Ben Clayton225a1302019-04-02 12:28:22 +010023#include "Vulkan/VkDescriptorSet.hpp"
Ben Clayton76e9bc02019-02-26 15:02:18 +000024#include "Vulkan/VkPipelineLayout.hpp"
Nicolas Capens09591b82019-04-08 22:51:08 -040025#include "Vulkan/VkDescriptorSetLayout.hpp"
Chris Forbes24466042019-04-22 10:54:23 -070026#include "Vulkan/VkRenderPass.hpp"
Chris Forbesaf4ed532018-12-06 18:33:27 -080027#include "Device/Config.hpp"
28
Nicolas Capens82eb22e2019-04-10 01:15:43 -040029#include <spirv/unified1/spirv.hpp>
30#include <spirv/unified1/GLSL.std.450.h>
31
Ben Claytonc0cf68b2019-03-21 17:46:08 +000032#ifdef Bool
33#undef Bool // b/127920555
Nicolas Capens7d867272019-04-08 22:51:08 -040034#undef None
Ben Claytonc0cf68b2019-03-21 17:46:08 +000035#endif
36
Ben Claytone747b3c2019-03-21 19:35:15 +000037namespace
38{
Ben Claytona15fcf42019-04-09 13:04:51 -040039 constexpr float PI = 3.141592653589793f;
40
Ben Claytone747b3c2019-03-21 19:35:15 +000041 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
42 {
43 return rr::SignMask(ints) != 0;
44 }
45
46 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
47 {
48 return rr::SignMask(~ints) != 0;
49 }
Ben Claytond86db952019-04-08 13:43:11 -040050
51 // Returns 1 << bits.
52 // If the resulting bit overflows a 32 bit integer, 0 is returned.
53 rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits)
54 {
55 return ((sw::SIMD::UInt(1) << bits) & rr::CmpLT(bits, sw::SIMD::UInt(32)));
56 }
57
58 // Returns bitCount number of of 1's starting from the LSB.
59 rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount)
60 {
61 return NthBit32(bitCount) - sw::SIMD::UInt(1);
62 }
Ben Clayton6caf8212019-04-09 11:28:39 -040063
64 // Performs a fused-multiply add, returning a * b + c.
65 rr::RValue<sw::SIMD::Float> FMA(
66 rr::RValue<sw::SIMD::Float> const &a,
67 rr::RValue<sw::SIMD::Float> const &b,
68 rr::RValue<sw::SIMD::Float> const &c)
69 {
70 return a * b + c;
71 }
Ben Clayton20f6ba82019-04-09 12:07:29 -040072
73 // Returns the exponent of the floating point number f.
74 // Assumes IEEE 754
75 rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f)
76 {
77 auto v = rr::As<sw::SIMD::UInt>(f);
78 return (sw::SIMD::Int((v >> sw::SIMD::UInt(23)) & sw::SIMD::UInt(0xFF)) - sw::SIMD::Int(126));
79 }
Ben Claytonee10bcf2019-04-09 17:01:01 -040080
81 // Returns y if y < x; otherwise result is x.
82 // If one operand is a NaN, the other operand is the result.
83 // If both operands are NaN, the result is a NaN.
84 rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
85 {
86 using namespace rr;
87 auto xIsNan = IsNan(x);
88 auto yIsNan = IsNan(y);
89 return As<sw::SIMD::Float>(
90 // If neither are NaN, return min
91 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
92 // If one operand is a NaN, the other operand is the result
93 // If both operands are NaN, the result is a NaN.
94 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
95 (( xIsNan ) & As<sw::SIMD::Int>(y)));
96 }
Ben Clayton02de7e02019-04-09 17:01:26 -040097
98 // Returns y if y > x; otherwise result is x.
99 // If one operand is a NaN, the other operand is the result.
100 // If both operands are NaN, the result is a NaN.
101 rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
102 {
103 using namespace rr;
104 auto xIsNan = IsNan(x);
105 auto yIsNan = IsNan(y);
106 return As<sw::SIMD::Float>(
107 // If neither are NaN, return max
108 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
109 // If one operand is a NaN, the other operand is the result
110 // If both operands are NaN, the result is a NaN.
111 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
112 (( xIsNan ) & As<sw::SIMD::Int>(y)));
113 }
Ben Clayton1fb633c2019-04-09 17:24:59 -0400114
115 // Returns the determinant of a 2x2 matrix.
116 rr::RValue<sw::SIMD::Float> Determinant(
117 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
118 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
119 {
120 return a*d - b*c;
121 }
122
123 // Returns the determinant of a 3x3 matrix.
124 rr::RValue<sw::SIMD::Float> Determinant(
125 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
126 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
127 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
128 {
129 return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h;
130 }
131
132 // Returns the determinant of a 4x4 matrix.
133 rr::RValue<sw::SIMD::Float> Determinant(
134 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
135 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
136 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
137 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
138 {
139 return a * Determinant(f, g, h,
140 j, k, l,
141 n, o, p) -
142 b * Determinant(e, g, h,
143 i, k, l,
144 m, o, p) +
145 c * Determinant(e, f, h,
146 i, j, l,
147 m, n, p) -
148 d * Determinant(e, f, g,
149 i, j, k,
150 m, n, o);
151 }
Ben Clayton445a44a2019-04-10 16:37:19 -0400152
153 // Returns the inverse of a 2x2 matrix.
154 std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
155 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
156 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
157 {
158 auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d);
159 return {{s*d, -s*b, -s*c, s*a}};
160 }
161
162 // Returns the inverse of a 3x3 matrix.
163 std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
164 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
165 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
166 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
167 {
168 auto s = sw::SIMD::Float(1.0f) / Determinant(
169 a, b, c,
170 d, e, f,
171 g, h, i); // TODO: duplicate arithmetic calculating the det and below.
172
173 return {{
174 s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e),
175 s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f),
176 s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d),
177 }};
178 }
179
180 // Returns the inverse of a 4x4 matrix.
181 std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
182 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
183 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
184 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
185 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
186 {
187 auto s = sw::SIMD::Float(1.0f) / Determinant(
188 a, b, c, d,
189 e, f, g, h,
190 i, j, k, l,
191 m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
192
193 auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n;
194 auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n;
195 auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j;
196 auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m;
197 auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i;
198 auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i;
199
200 return {{
201 s * ( f * kplo - g * jpln + h * jokn),
202 s * (-b * kplo + c * jpln - d * jokn),
203 s * ( b * gpho - c * fphn + d * fogn),
204 s * (-b * glhk + c * flhj - d * fkgj),
205
206 s * (-e * kplo + g * iplm - h * iokm),
207 s * ( a * kplo - c * iplm + d * iokm),
208 s * (-a * gpho + c * ephm - d * eogm),
209 s * ( a * glhk - c * elhi + d * ekgi),
210
211 s * ( e * jpln - f * iplm + h * injm),
212 s * (-a * jpln + b * iplm - d * injm),
213 s * ( a * fphn - b * ephm + d * enfm),
214 s * (-a * flhj + b * elhi - d * ejfi),
215
216 s * (-e * jokn + f * iokm - g * injm),
217 s * ( a * jokn - b * iokm + c * injm),
218 s * (-a * fogn + b * eogm - c * enfm),
219 s * ( a * fkgj - b * ekgi + c * ejfi),
220 }};
221 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400222
Chris Forbes24466042019-04-22 10:54:23 -0700223
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400224 sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
225 {
226 p *= sw::SIMD::Width;
227 p.staticOffsets[0] += 0 * sizeof(float);
228 p.staticOffsets[1] += 1 * sizeof(float);
229 p.staticOffsets[2] += 2 * sizeof(float);
230 p.staticOffsets[3] += 3 * sizeof(float);
231 return p;
232 }
233
Chris Forbes24466042019-04-22 10:54:23 -0700234 VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
235 {
236 switch (format)
237 {
238 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
239 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
240 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
241 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
242 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
243 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
244 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
245 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
246 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
247 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
248 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
249 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
250 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
251
252 default:
253 UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
254 return VK_FORMAT_UNDEFINED;
255 }
256 }
257
Chris Forbesa32d6302019-04-26 14:19:04 -0700258 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
259 {
260 sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
261 sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
262
263 sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
264
265 sw::SIMD::Float s = c;
266 s.xyz = rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // FIXME: IfThenElse()
267
268 return s;
269 }
270
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400271} // anonymous namespace
Ben Claytone747b3c2019-03-21 19:35:15 +0000272
Chris Forbesaf4ed532018-12-06 18:33:27 -0800273namespace sw
274{
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400275 namespace SIMD
276 {
277
278 template<typename T>
279 T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
280 {
281 using EL = typename Element<T>::type;
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400282 auto offsets = ptr.offsets();
283 mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
Ben Clayton0fc611f2019-04-18 11:23:27 -0400284 if (!atomic && order == std::memory_order_relaxed)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400285 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400286 return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, sizeof(float));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400287 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400288 else
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400289 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400290 T out;
291 auto anyLanesDisabled = AnyFalse(mask);
292 If(ptr.hasEqualOffsets() && !anyLanesDisabled)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400293 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400294 // Load one, replicate.
295 auto offset = Extract(offsets, 0);
296 out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
297 }
298 Else If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
299 {
300 // Load all elements in a single SIMD instruction.
301 auto offset = Extract(offsets, 0);
302 out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
303 }
304 Else
305 {
306 // Divergent offsets or masked lanes.
307 out = T(0);
308 for (int i = 0; i < SIMD::Width; i++)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400309 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400310 If(Extract(mask, i) != 0)
311 {
312 auto offset = Extract(offsets, i);
313 auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
314 out = Insert(out, el, i);
315 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400316 }
317 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400318 return out;
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400319 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400320 }
321
322 template<typename T>
323 void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
324 {
325 using EL = typename Element<T>::type;
326 auto offsets = ptr.offsets();
327 mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
Ben Clayton0fc611f2019-04-18 11:23:27 -0400328 if (!atomic && order == std::memory_order_relaxed)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400329 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400330 return rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, sizeof(float));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400331 }
Ben Clayton0fc611f2019-04-18 11:23:27 -0400332 else
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400333 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400334 auto anyLanesDisabled = AnyFalse(mask);
335 If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400336 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400337 // Store all elements in a single SIMD instruction.
338 auto offset = Extract(offsets, 0);
339 Store(val, rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
340 }
341 Else
342 {
343 // Divergent offsets or masked lanes.
344 for (int i = 0; i < SIMD::Width; i++)
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400345 {
Ben Clayton0fc611f2019-04-18 11:23:27 -0400346 If(Extract(mask, i) != 0)
347 {
348 auto offset = Extract(offsets, i);
349 rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
350 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -0400351 }
352 }
353 }
354 }
355
356 } // namespace SIMD
357
Chris Forbesaf4ed532018-12-06 18:33:27 -0800358 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
359
Ben Clayton60f15ec2019-05-09 17:50:01 +0100360 SpirvShader::SpirvShader(
361 VkPipelineShaderStageCreateInfo const *createInfo,
362 InsnStore const &insns,
363 vk::RenderPass *renderPass,
364 uint32_t subpassIndex)
365 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
366 outputs{MAX_INTERFACE_COMPONENTS},
367 serialID{serialCounter++}, modes{}
Chris Forbesaf4ed532018-12-06 18:33:27 -0800368 {
Ben Clayton45faa082019-03-05 13:20:40 +0000369 ASSERT(insns.size() > 0);
370
Chris Forbes24466042019-04-22 10:54:23 -0700371 if (renderPass != VK_NULL_HANDLE)
372 {
373 // capture formats of any input attachments present
374 auto subpass = renderPass->getSubpass(subpassIndex);
375 inputAttachmentFormats.reserve(subpass.inputAttachmentCount);
376 for (auto i = 0u; i < subpass.inputAttachmentCount; i++)
377 {
378 auto attachmentIndex = subpass.pInputAttachments[i].attachment;
379 inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
380 ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
381 }
382 }
383
Chris Forbesaf4ed532018-12-06 18:33:27 -0800384 // Simplifying assumptions (to be satisfied by earlier transformations)
Chris Forbesaf4ed532018-12-06 18:33:27 -0800385 // - The only input/output OpVariables present are those used by the entrypoint
386
Ben Clayton60f15ec2019-05-09 17:50:01 +0100387 Object::ID entryPointFunctionId;
Ben Clayton9b156612019-03-13 19:48:31 +0000388 Block::ID currentBlock;
389 InsnIterator blockStart;
Chris Forbese57f10e2019-03-04 10:53:07 -0800390
Chris Forbes4a979dc2019-01-17 09:36:46 -0800391 for (auto insn : *this)
392 {
Nicolas Capens125dba02019-04-24 02:03:22 -0400393 spv::Op opcode = insn.opcode();
394
395 switch (opcode)
Chris Forbes4a979dc2019-01-17 09:36:46 -0800396 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100397 case spv::OpEntryPoint:
398 {
399 auto executionModel = spv::ExecutionModel(insn.word(1));
400 auto id = Object::ID(insn.word(2));
401 auto name = insn.string(3);
402 auto stage = executionModelToStage(executionModel);
403 if (stage == createInfo->stage && strcmp(name, createInfo->pName) == 0)
404 {
405 ASSERT_MSG(entryPointFunctionId == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
406 entryPointFunctionId = id;
407 }
408 break;
409 }
410
Chris Forbes4a979dc2019-01-17 09:36:46 -0800411 case spv::OpExecutionMode:
412 ProcessExecutionMode(insn);
413 break;
Chris Forbesaf4ed532018-12-06 18:33:27 -0800414
Chris Forbesc25b8072018-12-10 15:10:39 -0800415 case spv::OpDecorate:
416 {
Ben Claytonab51bbf2019-02-20 14:36:27 +0000417 TypeOrObjectID targetId = insn.word(1);
Chris Forbes93f70b32019-02-10 21:26:27 +0000418 auto decoration = static_cast<spv::Decoration>(insn.word(2));
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400419 uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
420
421 decorations[targetId].Apply(decoration, value);
422
423 switch(decoration)
424 {
425 case spv::DecorationDescriptorSet:
426 descriptorDecorations[targetId].DescriptorSet = value;
427 break;
428 case spv::DecorationBinding:
429 descriptorDecorations[targetId].Binding = value;
430 break;
Chris Forbes24466042019-04-22 10:54:23 -0700431 case spv::DecorationInputAttachmentIndex:
432 descriptorDecorations[targetId].InputAttachmentIndex = value;
433 break;
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400434 default:
435 // Only handling descriptor decorations here.
436 break;
437 }
Chris Forbes93f70b32019-02-10 21:26:27 +0000438
439 if (decoration == spv::DecorationCentroid)
440 modes.NeedsCentroid = true;
Chris Forbesc25b8072018-12-10 15:10:39 -0800441 break;
442 }
443
444 case spv::OpMemberDecorate:
445 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000446 Type::ID targetId = insn.word(1);
Chris Forbesc25b8072018-12-10 15:10:39 -0800447 auto memberIndex = insn.word(2);
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400448 auto decoration = static_cast<spv::Decoration>(insn.word(3));
449 uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
450
Chris Forbesc25b8072018-12-10 15:10:39 -0800451 auto &d = memberDecorations[targetId];
452 if (memberIndex >= d.size())
453 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
Chris Forbes58228822019-04-17 12:51:29 -0700454
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400455 d[memberIndex].Apply(decoration, value);
Chris Forbes93f70b32019-02-10 21:26:27 +0000456
457 if (decoration == spv::DecorationCentroid)
458 modes.NeedsCentroid = true;
Chris Forbesc25b8072018-12-10 15:10:39 -0800459 break;
460 }
461
462 case spv::OpDecorationGroup:
463 // Nothing to do here. We don't need to record the definition of the group; we'll just have
464 // the bundle of decorations float around. If we were to ever walk the decorations directly,
465 // we might think about introducing this as a real Object.
466 break;
467
468 case spv::OpGroupDecorate:
469 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400470 uint32_t group = insn.word(1);
471 auto const &groupDecorations = decorations[group];
472 auto const &descriptorGroupDecorations = descriptorDecorations[group];
Chris Forbesc25b8072018-12-10 15:10:39 -0800473 for (auto i = 2u; i < insn.wordCount(); i++)
474 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400475 // Remaining operands are targets to apply the group to.
476 uint32_t target = insn.word(i);
477 decorations[target].Apply(groupDecorations);
478 descriptorDecorations[target].Apply(descriptorGroupDecorations);
Chris Forbesc25b8072018-12-10 15:10:39 -0800479 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400480
Chris Forbesc25b8072018-12-10 15:10:39 -0800481 break;
482 }
483
484 case spv::OpGroupMemberDecorate:
485 {
486 auto const &srcDecorations = decorations[insn.word(1)];
487 for (auto i = 2u; i < insn.wordCount(); i += 2)
488 {
489 // remaining operands are pairs of <id>, literal for members to apply to.
490 auto &d = memberDecorations[insn.word(i)];
491 auto memberIndex = insn.word(i + 1);
492 if (memberIndex >= d.size())
493 d.resize(memberIndex + 1); // on demand resize, see above...
494 d[memberIndex].Apply(srcDecorations);
495 }
496 break;
497 }
498
Chris Forbese57f10e2019-03-04 10:53:07 -0800499 case spv::OpLabel:
Ben Clayton9b156612019-03-13 19:48:31 +0000500 {
501 ASSERT(currentBlock.value() == 0);
502 currentBlock = Block::ID(insn.word(1));
503 blockStart = insn;
Chris Forbese57f10e2019-03-04 10:53:07 -0800504 break;
Ben Clayton9b156612019-03-13 19:48:31 +0000505 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800506
Ben Clayton9b156612019-03-13 19:48:31 +0000507 // Branch Instructions (subset of Termination Instructions):
508 case spv::OpBranch:
509 case spv::OpBranchConditional:
510 case spv::OpSwitch:
Chris Forbese57f10e2019-03-04 10:53:07 -0800511 case spv::OpReturn:
Ben Clayton9b156612019-03-13 19:48:31 +0000512 // fallthrough
513
514 // Termination instruction:
515 case spv::OpKill:
516 case spv::OpUnreachable:
517 {
518 ASSERT(currentBlock.value() != 0);
519 auto blockEnd = insn; blockEnd++;
520 blocks[currentBlock] = Block(blockStart, blockEnd);
521 currentBlock = Block::ID(0);
522
Nicolas Capens125dba02019-04-24 02:03:22 -0400523 if (opcode == spv::OpKill)
Ben Clayton9b156612019-03-13 19:48:31 +0000524 {
525 modes.ContainsKill = true;
526 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800527 break;
Ben Clayton9b156612019-03-13 19:48:31 +0000528 }
Chris Forbese57f10e2019-03-04 10:53:07 -0800529
Ben Claytone747b3c2019-03-21 19:35:15 +0000530 case spv::OpLoopMerge:
Ben Clayton9fd02e02019-03-21 18:47:15 +0000531 case spv::OpSelectionMerge:
532 break; // Nothing to do in analysis pass.
533
Chris Forbes4a979dc2019-01-17 09:36:46 -0800534 case spv::OpTypeVoid:
535 case spv::OpTypeBool:
536 case spv::OpTypeInt:
537 case spv::OpTypeFloat:
538 case spv::OpTypeVector:
539 case spv::OpTypeMatrix:
540 case spv::OpTypeImage:
541 case spv::OpTypeSampler:
542 case spv::OpTypeSampledImage:
543 case spv::OpTypeArray:
544 case spv::OpTypeRuntimeArray:
545 case spv::OpTypeStruct:
546 case spv::OpTypePointer:
547 case spv::OpTypeFunction:
Ben Clayton0bb83b82019-02-26 11:41:07 +0000548 DeclareType(insn);
Chris Forbes4a979dc2019-01-17 09:36:46 -0800549 break;
Chris Forbes296aa252018-12-27 11:48:21 -0800550
Chris Forbes4a979dc2019-01-17 09:36:46 -0800551 case spv::OpVariable:
552 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000553 Type::ID typeId = insn.word(1);
554 Object::ID resultId = insn.word(2);
Chris Forbes4a979dc2019-01-17 09:36:46 -0800555 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
Chris Forbes296aa252018-12-27 11:48:21 -0800556
Chris Forbes4a979dc2019-01-17 09:36:46 -0800557 auto &object = defs[resultId];
Ben Clayton1d514f32019-04-19 16:11:18 -0400558 object.kind = Object::Kind::Pointer;
Chris Forbes4a979dc2019-01-17 09:36:46 -0800559 object.definition = insn;
Ben Clayton9a162482019-02-25 11:54:43 +0000560 object.type = typeId;
Chris Forbesc25b8072018-12-10 15:10:39 -0800561
Ben Claytonecd38482019-04-19 17:11:08 -0400562 ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
Ben Claytonefec1b92019-03-05 17:38:16 +0000563 ASSERT(getType(typeId).storageClass == storageClass);
564
565 switch (storageClass)
Chris Forbesc25b8072018-12-10 15:10:39 -0800566 {
Ben Claytonefec1b92019-03-05 17:38:16 +0000567 case spv::StorageClassInput:
568 case spv::StorageClassOutput:
Ben Claytona1924732019-02-28 18:42:10 +0000569 ProcessInterfaceVariable(object);
Ben Claytonefec1b92019-03-05 17:38:16 +0000570 break;
Ben Clayton484e08e2019-04-05 12:11:39 +0100571
Ben Claytonefec1b92019-03-05 17:38:16 +0000572 case spv::StorageClassUniform:
573 case spv::StorageClassStorageBuffer:
Ben Clayton6b511342019-04-05 12:12:30 +0100574 object.kind = Object::Kind::DescriptorSet;
575 break;
576
Chris Forbesa30de542019-03-18 18:51:55 -0700577 case spv::StorageClassPushConstant:
Ben Claytonefec1b92019-03-05 17:38:16 +0000578 case spv::StorageClassPrivate:
579 case spv::StorageClassFunction:
Ben Claytonefec1b92019-03-05 17:38:16 +0000580 case spv::StorageClassUniformConstant:
Chris Forbesfa82c342019-04-26 16:42:38 -0700581 break; // Correctly handled.
Nicolas Capens7d867272019-04-08 22:51:08 -0400582
Ben Claytonefec1b92019-03-05 17:38:16 +0000583 case spv::StorageClassWorkgroup:
Ben Claytonecd38482019-04-19 17:11:08 -0400584 {
585 auto &elTy = getType(getType(typeId).element);
586 auto sizeInBytes = elTy.sizeInComponents * sizeof(float);
587 workgroupMemory.allocate(resultId, sizeInBytes);
588 object.kind = Object::Kind::Pointer;
589 break;
590 }
Ben Claytonefec1b92019-03-05 17:38:16 +0000591 case spv::StorageClassAtomicCounter:
592 case spv::StorageClassImage:
593 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
594 break;
595
Ben Clayton92797c22019-04-25 10:44:03 +0100596 case spv::StorageClassCrossWorkgroup:
597 UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
598 break;
599
600 case spv::StorageClassGeneric:
601 UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
602 break;
603
Ben Claytonefec1b92019-03-05 17:38:16 +0000604 default:
Nicolas Capens29090852019-03-19 16:22:35 -0400605 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
Ben Claytonefec1b92019-03-05 17:38:16 +0000606 break;
Chris Forbesc25b8072018-12-10 15:10:39 -0800607 }
Chris Forbes4a979dc2019-01-17 09:36:46 -0800608 break;
609 }
Chris Forbes296aa252018-12-27 11:48:21 -0800610
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800611 case spv::OpConstant:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800612 CreateConstant(insn).constantValue[0] = insn.word(3);
613 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800614 case spv::OpConstantFalse:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800615 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
616 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800617 case spv::OpConstantTrue:
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800618 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
619 break;
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800620 case spv::OpConstantNull:
Chris Forbes0e712412019-03-18 19:31:16 -0700621 case spv::OpUndef:
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800622 {
Chris Forbes0e712412019-03-18 19:31:16 -0700623 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800624 // OpConstantNull forms a constant of arbitrary type, all zeros.
Ben Clayton9a162482019-02-25 11:54:43 +0000625 auto &object = CreateConstant(insn);
626 auto &objectTy = getType(object.type);
627 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800628 {
629 object.constantValue[i] = 0;
630 }
631 break;
632 }
633 case spv::OpConstantComposite:
634 {
635 auto &object = CreateConstant(insn);
636 auto offset = 0u;
637 for (auto i = 0u; i < insn.wordCount() - 3; i++)
638 {
Ben Clayton9a162482019-02-25 11:54:43 +0000639 auto &constituent = getObject(insn.word(i + 3));
640 auto &constituentTy = getType(constituent.type);
641 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
Chris Forbes1ca8acd2019-02-20 13:00:54 -0800642 object.constantValue[offset++] = constituent.constantValue[j];
643 }
Ben Clayton62758f52019-03-13 14:18:58 +0000644
645 auto objectId = Object::ID(insn.word(2));
646 auto decorationsIt = decorations.find(objectId);
647 if (decorationsIt != decorations.end() &&
648 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
649 {
650 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
651 // Decorating an object with the WorkgroupSize built-in
652 // decoration will make that object contain the dimensions
653 // of a local workgroup. If an object is decorated with the
654 // WorkgroupSize decoration, this must take precedence over
655 // any execution mode set for LocalSize.
656 // The object decorated with WorkgroupSize must be declared
657 // as a three-component vector of 32-bit integers.
658 ASSERT(getType(object.type).sizeInComponents == 3);
659 modes.WorkgroupSizeX = object.constantValue[0];
660 modes.WorkgroupSizeY = object.constantValue[1];
661 modes.WorkgroupSizeZ = object.constantValue[2];
662 }
Chris Forbesbc3a0ee2018-12-27 16:02:58 -0800663 break;
664 }
665
Chris Forbesbde34082018-12-28 12:03:10 -0800666 case spv::OpCapability:
Ben Clayton9b156612019-03-13 19:48:31 +0000667 break; // Various capabilities will be declared, but none affect our code generation at this point.
Chris Forbesbde34082018-12-28 12:03:10 -0800668 case spv::OpMemoryModel:
Ben Clayton9b156612019-03-13 19:48:31 +0000669 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
670
Chris Forbes7edf5342019-02-10 22:41:21 +0000671 case spv::OpFunction:
Ben Clayton60f15ec2019-05-09 17:50:01 +0100672 {
673 auto functionId = Object::ID(insn.word(2));
674 if (functionId == entryPointFunctionId)
Ben Clayton9b156612019-03-13 19:48:31 +0000675 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100676 // Scan forward to find the function's label.
677 for (auto it = insn; it != end() && entryPointBlockId == 0; it++)
Ben Clayton9b156612019-03-13 19:48:31 +0000678 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100679 switch (it.opcode())
680 {
681 case spv::OpFunction:
682 case spv::OpFunctionParameter:
683 break;
684 case spv::OpLabel:
685 entryPointBlockId = Block::ID(it.word(1));
686 break;
687 default:
688 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
689 }
Ben Clayton9b156612019-03-13 19:48:31 +0000690 }
691 }
Ben Clayton60f15ec2019-05-09 17:50:01 +0100692 else
693 {
694 // All non-entry point functions should be inlined into an
695 // entry point function.
696 // This isn't the target entry point, so must be another
697 // entry point that we are not interested in. Just skip it.
698 for (; insn != end() && insn.opcode() != spv::OpFunctionEnd; insn++) {}
699 }
700
Ben Clayton9b156612019-03-13 19:48:31 +0000701 break;
Ben Clayton60f15ec2019-05-09 17:50:01 +0100702 }
Chris Forbes7edf5342019-02-10 22:41:21 +0000703 case spv::OpFunctionEnd:
704 // Due to preprocessing, the entrypoint and its function provide no value.
705 break;
706 case spv::OpExtInstImport:
Ben Clayton92797c22019-04-25 10:44:03 +0100707 {
Chris Forbes7edf5342019-02-10 22:41:21 +0000708 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
709 // Valid shaders will not attempt to import any other instruction sets.
Ben Clayton60f15ec2019-05-09 17:50:01 +0100710 auto ext = insn.string(2);
Ben Clayton92797c22019-04-25 10:44:03 +0100711 if (0 != strcmp("GLSL.std.450", ext))
Chris Forbes9667a5b2019-03-07 09:26:48 -0800712 {
Ben Clayton92797c22019-04-25 10:44:03 +0100713 UNSUPPORTED("SPIR-V Extension: %s", ext);
Chris Forbes9667a5b2019-03-07 09:26:48 -0800714 }
715 break;
Ben Clayton92797c22019-04-25 10:44:03 +0100716 }
Chris Forbes1776af72019-02-22 17:39:57 -0800717 case spv::OpName:
718 case spv::OpMemberName:
719 case spv::OpSource:
720 case spv::OpSourceContinued:
721 case spv::OpSourceExtension:
Chris Forbesf3a430d2019-03-08 07:51:39 -0800722 case spv::OpLine:
723 case spv::OpNoLine:
724 case spv::OpModuleProcessed:
725 case spv::OpString:
Chris Forbes1776af72019-02-22 17:39:57 -0800726 // No semantic impact
Chris Forbes7edf5342019-02-10 22:41:21 +0000727 break;
728
729 case spv::OpFunctionParameter:
730 case spv::OpFunctionCall:
731 case spv::OpSpecConstant:
732 case spv::OpSpecConstantComposite:
733 case spv::OpSpecConstantFalse:
734 case spv::OpSpecConstantOp:
735 case spv::OpSpecConstantTrue:
736 // These should have all been removed by preprocessing passes. If we see them here,
737 // our assumptions are wrong and we will probably generate wrong code.
Ben Clayton92797c22019-04-25 10:44:03 +0100738 UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
Chris Forbes7edf5342019-02-10 22:41:21 +0000739 break;
740
Chris Forbes4d503052019-03-01 17:13:57 -0800741 case spv::OpFConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100742 UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
Nicolas Capens7d867272019-04-08 22:51:08 -0400743 break;
744
Chris Forbes4d503052019-03-01 17:13:57 -0800745 case spv::OpSConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100746 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
747 break;
748
Chris Forbes4d503052019-03-01 17:13:57 -0800749 case spv::OpUConvert:
Ben Clayton92797c22019-04-25 10:44:03 +0100750 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
Chris Forbes4d503052019-03-01 17:13:57 -0800751 break;
752
Chris Forbesa71b8e92019-02-10 22:42:42 +0000753 case spv::OpLoad:
754 case spv::OpAccessChain:
Chris Forbes10fd6242019-03-15 12:27:34 -0700755 case spv::OpInBoundsAccessChain:
Chris Forbesfa82c342019-04-26 16:42:38 -0700756 case spv::OpSampledImage:
757 case spv::OpImage:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400758 {
759 // Propagate the descriptor decorations to the result.
760 Object::ID resultId = insn.word(2);
761 Object::ID pointerId = insn.word(3);
762 const auto &d = descriptorDecorations.find(pointerId);
763
764 if(d != descriptorDecorations.end())
765 {
766 descriptorDecorations[resultId] = d->second;
767 }
768
769 DefineResult(insn);
Chris Forbese6419ad2019-04-11 12:23:10 -0700770
Nicolas Capens125dba02019-04-24 02:03:22 -0400771 if (opcode == spv::OpAccessChain || opcode == spv::OpInBoundsAccessChain)
Chris Forbese6419ad2019-04-11 12:23:10 -0700772 {
773 Decorations dd{};
Chris Forbes3610ded2019-04-22 18:12:13 -0700774 ApplyDecorationsForAccessChain(&dd, &descriptorDecorations[resultId], pointerId, insn.wordCount() - 4, insn.wordPointer(4));
Chris Forbese6419ad2019-04-11 12:23:10 -0700775 // Note: offset is the one thing that does *not* propagate, as the access chain accounts for it.
776 dd.HasOffset = false;
777 decorations[resultId].Apply(dd);
778 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400779 }
780 break;
781
Chris Forbesb97a9572019-02-21 16:51:42 -0800782 case spv::OpCompositeConstruct:
Chris Forbes1bc1acf2019-02-21 18:40:33 -0800783 case spv::OpCompositeInsert:
Chris Forbesb12846d2019-02-21 18:53:58 -0800784 case spv::OpCompositeExtract:
Chris Forbes83fc5442019-02-26 22:16:07 -0800785 case spv::OpVectorShuffle:
Chris Forbesfaed9d32019-03-15 10:31:08 -0700786 case spv::OpVectorTimesScalar:
Chris Forbes57e05b82019-03-28 09:16:20 +1300787 case spv::OpMatrixTimesScalar:
Chris Forbes06f4ed72019-03-28 09:53:20 +1300788 case spv::OpMatrixTimesVector:
Chris Forbesa563dd82019-03-28 10:32:55 +1300789 case spv::OpVectorTimesMatrix:
Chris Forbes51562f12019-03-28 19:08:39 -0700790 case spv::OpMatrixTimesMatrix:
Ben Clayton3ee52992019-04-08 11:01:23 -0400791 case spv::OpOuterProduct:
Ben Clayton620f7082019-04-08 11:12:08 -0400792 case spv::OpTranspose:
Chris Forbesfaed9d32019-03-15 10:31:08 -0700793 case spv::OpVectorExtractDynamic:
794 case spv::OpVectorInsertDynamic:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400795 // Unary ops
796 case spv::OpNot:
Ben Claytonb5bfa502019-04-08 14:26:36 -0400797 case spv::OpBitFieldInsert:
Ben Claytond86db952019-04-08 13:43:11 -0400798 case spv::OpBitFieldSExtract:
799 case spv::OpBitFieldUExtract:
Ben Claytond2a46432019-04-08 11:41:45 -0400800 case spv::OpBitReverse:
Ben Clayton1eb017d2019-04-08 11:32:09 -0400801 case spv::OpBitCount:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000802 case spv::OpSNegate:
803 case spv::OpFNegate:
804 case spv::OpLogicalNot:
Ben Clayton64da4ae2019-04-19 12:34:06 -0400805 case spv::OpQuantizeToF16:
Nicolas Capens82eb22e2019-04-10 01:15:43 -0400806 // Binary ops
807 case spv::OpIAdd:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000808 case spv::OpISub:
809 case spv::OpIMul:
810 case spv::OpSDiv:
811 case spv::OpUDiv:
812 case spv::OpFAdd:
813 case spv::OpFSub:
Chris Forbes9d931532019-03-08 09:53:03 -0800814 case spv::OpFMul:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000815 case spv::OpFDiv:
Chris Forbes0e4d6ff2019-03-15 13:43:36 -0700816 case spv::OpFMod:
Chris Forbes1a4c7122019-03-15 14:50:47 -0700817 case spv::OpFRem:
Ben Claytonec1aeb82019-03-04 19:33:27 +0000818 case spv::OpFOrdEqual:
819 case spv::OpFUnordEqual:
820 case spv::OpFOrdNotEqual:
821 case spv::OpFUnordNotEqual:
822 case spv::OpFOrdLessThan:
823 case spv::OpFUnordLessThan:
824 case spv::OpFOrdGreaterThan:
825 case spv::OpFUnordGreaterThan:
826 case spv::OpFOrdLessThanEqual:
827 case spv::OpFUnordLessThanEqual:
828 case spv::OpFOrdGreaterThanEqual:
829 case spv::OpFUnordGreaterThanEqual:
Ben Claytonbb8c8e22019-03-08 12:04:00 +0000830 case spv::OpSMod:
Chris Forbes71673c82019-03-14 12:55:20 -0700831 case spv::OpSRem:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000832 case spv::OpUMod:
Ben Claytone95eeb12019-03-04 16:32:09 +0000833 case spv::OpIEqual:
834 case spv::OpINotEqual:
835 case spv::OpUGreaterThan:
836 case spv::OpSGreaterThan:
837 case spv::OpUGreaterThanEqual:
838 case spv::OpSGreaterThanEqual:
839 case spv::OpULessThan:
840 case spv::OpSLessThan:
841 case spv::OpULessThanEqual:
842 case spv::OpSLessThanEqual:
Ben Claytondd1e37e2019-02-28 19:59:15 +0000843 case spv::OpShiftRightLogical:
844 case spv::OpShiftRightArithmetic:
845 case spv::OpShiftLeftLogical:
846 case spv::OpBitwiseOr:
847 case spv::OpBitwiseXor:
848 case spv::OpBitwiseAnd:
849 case spv::OpLogicalOr:
850 case spv::OpLogicalAnd:
Chris Forbes787b4462019-03-08 12:16:57 -0800851 case spv::OpLogicalEqual:
852 case spv::OpLogicalNotEqual:
Chris Forbese86b6dc2019-03-01 09:08:47 -0800853 case spv::OpUMulExtended:
854 case spv::OpSMulExtended:
Chris Forbes3e6f60b2019-05-08 17:28:10 -0700855 case spv::OpIAddCarry:
856 case spv::OpISubBorrow:
Chris Forbes2b287cc2019-03-01 13:24:17 -0800857 case spv::OpDot:
Chris Forbes4d503052019-03-01 17:13:57 -0800858 case spv::OpConvertFToU:
859 case spv::OpConvertFToS:
860 case spv::OpConvertSToF:
861 case spv::OpConvertUToF:
862 case spv::OpBitcast:
Ben Claytonbf943f62019-03-05 12:57:39 +0000863 case spv::OpSelect:
Chris Forbes9667a5b2019-03-07 09:26:48 -0800864 case spv::OpExtInst:
Chris Forbes3ed33ce2019-03-07 13:38:31 -0800865 case spv::OpIsInf:
866 case spv::OpIsNan:
Chris Forbes0785f692019-03-08 09:09:18 -0800867 case spv::OpAny:
868 case spv::OpAll:
Chris Forbesaff2dd02019-03-20 14:50:24 -0700869 case spv::OpDPdx:
870 case spv::OpDPdxCoarse:
871 case spv::OpDPdy:
872 case spv::OpDPdyCoarse:
873 case spv::OpFwidth:
874 case spv::OpFwidthCoarse:
875 case spv::OpDPdxFine:
876 case spv::OpDPdyFine:
877 case spv::OpFwidthFine:
Nicolas Capens5e8414e2019-03-19 16:22:35 -0400878 case spv::OpAtomicLoad:
Chris Forbes17813932019-04-18 11:45:54 -0700879 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -0700880 case spv::OpAtomicISub:
Chris Forbes17813932019-04-18 11:45:54 -0700881 case spv::OpAtomicSMin:
882 case spv::OpAtomicSMax:
883 case spv::OpAtomicUMin:
884 case spv::OpAtomicUMax:
885 case spv::OpAtomicAnd:
886 case spv::OpAtomicOr:
887 case spv::OpAtomicXor:
Chris Forbes707ed992019-04-18 18:17:35 -0700888 case spv::OpAtomicIIncrement:
889 case spv::OpAtomicIDecrement:
Chris Forbes17813932019-04-18 11:45:54 -0700890 case spv::OpAtomicExchange:
Chris Forbesa16238d2019-04-18 16:31:54 -0700891 case spv::OpAtomicCompareExchange:
Ben Clayton9fd02e02019-03-21 18:47:15 +0000892 case spv::OpPhi:
Nicolas Capens7d867272019-04-08 22:51:08 -0400893 case spv::OpImageSampleImplicitLod:
Nicolas Capens125dba02019-04-24 02:03:22 -0400894 case spv::OpImageSampleExplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -0400895 case spv::OpImageSampleDrefImplicitLod:
896 case spv::OpImageSampleDrefExplicitLod:
897 case spv::OpImageSampleProjImplicitLod:
898 case spv::OpImageSampleProjExplicitLod:
899 case spv::OpImageSampleProjDrefImplicitLod:
900 case spv::OpImageSampleProjDrefExplicitLod:
Chris Forbescd631592019-04-27 10:37:18 -0700901 case spv::OpImageFetch:
Chris Forbesb0d00ea2019-04-17 20:24:20 -0700902 case spv::OpImageQuerySize:
Ben Clayton0264d8e2019-05-08 15:39:40 +0100903 case spv::OpImageQuerySizeLod:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -0700904 case spv::OpImageRead:
Chris Forbesb51f2c12019-04-18 11:01:30 -0700905 case spv::OpImageTexelPointer:
Ben Clayton32d47972019-04-19 17:08:15 -0400906 case spv::OpGroupNonUniformElect:
Ben Clayton78abf372019-05-09 15:11:58 +0100907 case spv::OpCopyObject:
Ben Claytone4605da2019-05-09 16:24:01 +0100908 case spv::OpArrayLength:
Nicolas Capens7d867272019-04-08 22:51:08 -0400909 // Instructions that yield an intermediate value or divergent pointer
910 DefineResult(insn);
Chris Forbesa71b8e92019-02-10 22:42:42 +0000911 break;
Chris Forbesa71b8e92019-02-10 22:42:42 +0000912
Chris Forbes7edf5342019-02-10 22:41:21 +0000913 case spv::OpStore:
Nicolas Capens5e8414e2019-03-19 16:22:35 -0400914 case spv::OpAtomicStore:
Chris Forbes179f0142019-04-17 20:24:44 -0700915 case spv::OpImageWrite:
Ben Claytonb5a45462019-04-30 19:21:29 +0100916 case spv::OpCopyMemory:
Ben Claytonb16c5862019-05-08 14:01:38 +0100917 case spv::OpMemoryBarrier:
Chris Forbes7edf5342019-02-10 22:41:21 +0000918 // Don't need to do anything during analysis pass
919 break;
920
Ben Claytonecfeede2019-05-08 08:51:01 +0100921 case spv::OpControlBarrier:
922 modes.ContainsControlBarriers = true;
923 break;
924
Chris Forbes9869d602019-04-18 17:26:16 -0700925 case spv::OpExtension:
926 {
Ben Clayton60f15ec2019-05-09 17:50:01 +0100927 auto ext = insn.string(1);
Chris Forbes9869d602019-04-18 17:26:16 -0700928 // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
929 // extension per Appendix A, `Vulkan Environment for SPIR-V`.
Ben Clayton92797c22019-04-25 10:44:03 +0100930 if (!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
Chris Forbescb9bf9a2019-05-10 08:13:34 -0700931 if (!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
932 if (!strcmp(ext, "SPV_KHR_16bit_storage")) break;
Chris Forbes39b07502019-05-09 09:47:06 -0700933 if (!strcmp(ext, "SPV_KHR_variable_pointers")) break;
Ben Clayton92797c22019-04-25 10:44:03 +0100934 UNSUPPORTED("SPIR-V Extension: %s", ext);
Chris Forbes9869d602019-04-18 17:26:16 -0700935 break;
936 }
937
Chris Forbes4a979dc2019-01-17 09:36:46 -0800938 default:
Nicolas Capens125dba02019-04-24 02:03:22 -0400939 UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
Chris Forbesaf4ed532018-12-06 18:33:27 -0800940 }
941 }
Ben Clayton64f78f52019-03-21 17:21:06 +0000942
Ben Clayton60f15ec2019-05-09 17:50:01 +0100943 ASSERT_MSG(entryPointFunctionId != 0, "Entry point '%s' not found", createInfo->pName);
Ben Claytonfe3f0132019-03-26 11:10:16 +0000944 AssignBlockIns();
945 }
946
Ben Clayton513ed1d2019-03-28 16:07:00 +0000947 void SpirvShader::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000948 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000949 if (reachable.count(id) == 0)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000950 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000951 reachable.emplace(id);
952 for (auto out : getBlock(id).outs)
Ben Claytonfe3f0132019-03-26 11:10:16 +0000953 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000954 TraverseReachableBlocks(out, reachable);
Ben Claytonfe3f0132019-03-26 11:10:16 +0000955 }
956 }
957 }
958
959 void SpirvShader::AssignBlockIns()
960 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000961 Block::Set reachable;
Ben Clayton60f15ec2019-05-09 17:50:01 +0100962 TraverseReachableBlocks(entryPointBlockId, reachable);
Ben Clayton513ed1d2019-03-28 16:07:00 +0000963
Ben Clayton64f78f52019-03-21 17:21:06 +0000964 for (auto &it : blocks)
965 {
966 auto &blockId = it.first;
Ben Clayton513ed1d2019-03-28 16:07:00 +0000967 if (reachable.count(blockId) > 0)
Ben Clayton64f78f52019-03-21 17:21:06 +0000968 {
Ben Clayton513ed1d2019-03-28 16:07:00 +0000969 for (auto &outId : it.second.outs)
970 {
971 auto outIt = blocks.find(outId);
972 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
973 auto &out = outIt->second;
974 out.ins.emplace(blockId);
975 }
Ben Clayton64f78f52019-03-21 17:21:06 +0000976 }
977 }
Chris Forbesaf4ed532018-12-06 18:33:27 -0800978 }
979
Ben Clayton0bb83b82019-02-26 11:41:07 +0000980 void SpirvShader::DeclareType(InsnIterator insn)
981 {
Ben Claytonaf973b62019-03-13 18:19:20 +0000982 Type::ID resultId = insn.word(1);
Ben Clayton0bb83b82019-02-26 11:41:07 +0000983
984 auto &type = types[resultId];
985 type.definition = insn;
986 type.sizeInComponents = ComputeTypeSize(insn);
987
988 // A structure is a builtin block if it has a builtin
989 // member. All members of such a structure are builtins.
990 switch (insn.opcode())
991 {
992 case spv::OpTypeStruct:
993 {
994 auto d = memberDecorations.find(resultId);
995 if (d != memberDecorations.end())
996 {
997 for (auto &m : d->second)
998 {
999 if (m.HasBuiltIn)
1000 {
1001 type.isBuiltInBlock = true;
1002 break;
1003 }
1004 }
1005 }
1006 break;
1007 }
1008 case spv::OpTypePointer:
1009 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001010 Type::ID elementTypeId = insn.word(3);
Ben Clayton0bb83b82019-02-26 11:41:07 +00001011 type.element = elementTypeId;
1012 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
1013 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
1014 break;
1015 }
1016 case spv::OpTypeVector:
1017 case spv::OpTypeMatrix:
1018 case spv::OpTypeArray:
1019 case spv::OpTypeRuntimeArray:
1020 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001021 Type::ID elementTypeId = insn.word(2);
Ben Clayton0bb83b82019-02-26 11:41:07 +00001022 type.element = elementTypeId;
1023 break;
1024 }
1025 default:
1026 break;
1027 }
1028 }
1029
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001030 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
1031 {
Ben Claytonaf973b62019-03-13 18:19:20 +00001032 Type::ID typeId = insn.word(1);
1033 Object::ID resultId = insn.word(2);
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001034 auto &object = defs[resultId];
Ben Clayton9a162482019-02-25 11:54:43 +00001035 auto &objectTy = getType(typeId);
1036 object.type = typeId;
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001037 object.kind = Object::Kind::Constant;
1038 object.definition = insn;
Ben Clayton9a162482019-02-25 11:54:43 +00001039 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
Chris Forbes1ca8acd2019-02-20 13:00:54 -08001040 return object;
1041 }
1042
Chris Forbes049ff382019-02-02 15:16:43 -08001043 void SpirvShader::ProcessInterfaceVariable(Object &object)
Chris Forbesbde34082018-12-28 12:03:10 -08001044 {
Ben Clayton9a162482019-02-25 11:54:43 +00001045 auto &objectTy = getType(object.type);
Ben Clayton6fae32c2019-02-28 20:06:42 +00001046 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
Chris Forbesbde34082018-12-28 12:03:10 -08001047
Nicolas Capens29090852019-03-19 16:22:35 -04001048 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton9a162482019-02-25 11:54:43 +00001049 auto pointeeTy = getType(objectTy.element);
Chris Forbesbde34082018-12-28 12:03:10 -08001050
Ben Clayton9a162482019-02-25 11:54:43 +00001051 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
1052 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
1053
Nicolas Capens29090852019-03-19 16:22:35 -04001054 ASSERT(object.opcode() == spv::OpVariable);
Ben Claytonaf973b62019-03-13 18:19:20 +00001055 Object::ID resultId = object.definition.word(2);
Ben Clayton9a162482019-02-25 11:54:43 +00001056
1057 if (objectTy.isBuiltInBlock)
Chris Forbesbde34082018-12-28 12:03:10 -08001058 {
1059 // walk the builtin block, registering each of its members separately.
Ben Clayton9a162482019-02-25 11:54:43 +00001060 auto m = memberDecorations.find(objectTy.element);
Ben Clayton6fae32c2019-02-28 20:06:42 +00001061 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
Ben Clayton9a162482019-02-25 11:54:43 +00001062 auto &structType = pointeeTy.definition;
Chris Forbesbde34082018-12-28 12:03:10 -08001063 auto offset = 0u;
1064 auto word = 2u;
1065 for (auto &member : m->second)
1066 {
Chris Forbes840809a2019-01-14 14:30:20 -08001067 auto &memberType = getType(structType.word(word));
Chris Forbesbde34082018-12-28 12:03:10 -08001068
1069 if (member.HasBuiltIn)
1070 {
1071 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
1072 }
1073
1074 offset += memberType.sizeInComponents;
1075 ++word;
1076 }
1077 return;
1078 }
1079
1080 auto d = decorations.find(resultId);
1081 if (d != decorations.end() && d->second.HasBuiltIn)
1082 {
Ben Clayton9a162482019-02-25 11:54:43 +00001083 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
Chris Forbesbde34082018-12-28 12:03:10 -08001084 }
1085 else
1086 {
Chris Forbes049ff382019-02-02 15:16:43 -08001087 object.kind = Object::Kind::InterfaceVariable;
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001088 VisitInterface(resultId,
1089 [&userDefinedInterface](Decorations const &d, AttribType type) {
1090 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
1091 auto scalarSlot = (d.Location << 2) | d.Component;
Ben Clayton6fae32c2019-02-28 20:06:42 +00001092 ASSERT(scalarSlot >= 0 &&
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001093 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
1094
1095 auto &slot = userDefinedInterface[scalarSlot];
1096 slot.Type = type;
1097 slot.Flat = d.Flat;
1098 slot.NoPerspective = d.NoPerspective;
1099 slot.Centroid = d.Centroid;
1100 });
Chris Forbesbde34082018-12-28 12:03:10 -08001101 }
1102 }
1103
Chris Forbesaf4ed532018-12-06 18:33:27 -08001104 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
1105 {
1106 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
Chris Forbes4a979dc2019-01-17 09:36:46 -08001107 switch (mode)
1108 {
1109 case spv::ExecutionModeEarlyFragmentTests:
1110 modes.EarlyFragmentTests = true;
1111 break;
1112 case spv::ExecutionModeDepthReplacing:
1113 modes.DepthReplacing = true;
1114 break;
1115 case spv::ExecutionModeDepthGreater:
1116 modes.DepthGreater = true;
1117 break;
1118 case spv::ExecutionModeDepthLess:
1119 modes.DepthLess = true;
1120 break;
1121 case spv::ExecutionModeDepthUnchanged:
1122 modes.DepthUnchanged = true;
1123 break;
1124 case spv::ExecutionModeLocalSize:
Ben Clayton62758f52019-03-13 14:18:58 +00001125 modes.WorkgroupSizeX = insn.word(3);
1126 modes.WorkgroupSizeY = insn.word(4);
1127 modes.WorkgroupSizeZ = insn.word(5);
Chris Forbes4a979dc2019-01-17 09:36:46 -08001128 break;
1129 case spv::ExecutionModeOriginUpperLeft:
1130 // This is always the case for a Vulkan shader. Do nothing.
1131 break;
1132 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001133 UNREACHABLE("Execution mode: %d", int(mode));
Chris Forbesaf4ed532018-12-06 18:33:27 -08001134 }
1135 }
Chris Forbes739a7fb2018-12-08 13:09:40 -08001136
Ben Clayton9b156612019-03-13 19:48:31 +00001137 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
Chris Forbes739a7fb2018-12-08 13:09:40 -08001138 {
1139 // Types are always built from the bottom up (with the exception of forward ptrs, which
1140 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
1141 // already been described (and so their sizes determined)
1142 switch (insn.opcode())
1143 {
1144 case spv::OpTypeVoid:
1145 case spv::OpTypeSampler:
1146 case spv::OpTypeImage:
1147 case spv::OpTypeSampledImage:
1148 case spv::OpTypeFunction:
1149 case spv::OpTypeRuntimeArray:
1150 // Objects that don't consume any space.
1151 // Descriptor-backed objects currently only need exist at compile-time.
1152 // Runtime arrays don't appear in places where their size would be interesting
1153 return 0;
1154
1155 case spv::OpTypeBool:
1156 case spv::OpTypeFloat:
1157 case spv::OpTypeInt:
1158 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
1159 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
1160 return 1;
1161
1162 case spv::OpTypeVector:
1163 case spv::OpTypeMatrix:
1164 // Vectors and matrices both consume element count * element size.
Chris Forbes840809a2019-01-14 14:30:20 -08001165 return getType(insn.word(2)).sizeInComponents * insn.word(3);
Chris Forbes739a7fb2018-12-08 13:09:40 -08001166
1167 case spv::OpTypeArray:
Chris Forbes5be4d702018-12-27 16:12:31 -08001168 {
1169 // Element count * element size. Array sizes come from constant ids.
1170 auto arraySize = GetConstantInt(insn.word(3));
Chris Forbes840809a2019-01-14 14:30:20 -08001171 return getType(insn.word(2)).sizeInComponents * arraySize;
Chris Forbes5be4d702018-12-27 16:12:31 -08001172 }
Chris Forbes739a7fb2018-12-08 13:09:40 -08001173
1174 case spv::OpTypeStruct:
1175 {
1176 uint32_t size = 0;
1177 for (uint32_t i = 2u; i < insn.wordCount(); i++)
1178 {
Chris Forbes840809a2019-01-14 14:30:20 -08001179 size += getType(insn.word(i)).sizeInComponents;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001180 }
1181 return size;
1182 }
1183
1184 case spv::OpTypePointer:
Chris Forbes0f59a2c2019-02-10 23:03:12 +00001185 // Runtime representation of a pointer is a per-lane index.
1186 // Note: clients are expected to look through the pointer if they want the pointee size instead.
1187 return 1;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001188
1189 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001190 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Clayton60a3d6f2019-02-26 17:24:46 +00001191 return 0;
Chris Forbes739a7fb2018-12-08 13:09:40 -08001192 }
1193 }
Chris Forbesc25b8072018-12-10 15:10:39 -08001194
Ben Clayton831db962019-02-27 14:57:18 +00001195 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
1196 {
1197 switch (storageClass)
1198 {
1199 case spv::StorageClassUniform:
1200 case spv::StorageClassStorageBuffer:
Chris Forbesa30de542019-03-18 18:51:55 -07001201 case spv::StorageClassPushConstant:
Ben Claytonecd38482019-04-19 17:11:08 -04001202 case spv::StorageClassWorkgroup:
Ben Clayton831db962019-02-27 14:57:18 +00001203 return false;
1204 default:
1205 return true;
1206 }
1207 }
1208
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001209 template<typename F>
Ben Claytonaf973b62019-03-13 18:19:20 +00001210 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
Chris Forbes5839dcf2018-12-10 19:02:58 -08001211 {
1212 // Recursively walks variable definition and its type tree, taking into account
1213 // any explicit Location or Component decorations encountered; where explicit
1214 // Locations or Components are not specified, assigns them sequentially.
1215 // Collected decorations are carried down toward the leaves and across
1216 // siblings; Effect of decorations intentionally does not flow back up the tree.
1217 //
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001218 // F is a functor to be called with the effective decoration set for every component.
1219 //
1220 // Returns the next available location, and calls f().
Chris Forbes5839dcf2018-12-10 19:02:58 -08001221
1222 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
1223
Chris Forbes49d664d2019-02-12 19:24:50 +00001224 ApplyDecorationsForId(&d, id);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001225
Chris Forbes840809a2019-01-14 14:30:20 -08001226 auto const &obj = getType(id);
Nicolas Capens29090852019-03-19 16:22:35 -04001227 switch(obj.opcode())
Chris Forbes5839dcf2018-12-10 19:02:58 -08001228 {
Chris Forbes5839dcf2018-12-10 19:02:58 -08001229 case spv::OpTypePointer:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001230 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001231 case spv::OpTypeMatrix:
1232 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
1233 {
1234 // consumes same components of N consecutive locations
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001235 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001236 }
1237 return d.Location;
1238 case spv::OpTypeVector:
1239 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
1240 {
1241 // consumes N consecutive components in the same location
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001242 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001243 }
1244 return d.Location + 1;
1245 case spv::OpTypeFloat:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001246 f(d, ATTRIBTYPE_FLOAT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001247 return d.Location + 1;
1248 case spv::OpTypeInt:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001249 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001250 return d.Location + 1;
1251 case spv::OpTypeBool:
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001252 f(d, ATTRIBTYPE_UINT);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001253 return d.Location + 1;
1254 case spv::OpTypeStruct:
1255 {
Chris Forbes5839dcf2018-12-10 19:02:58 -08001256 // iterate over members, which may themselves have Location/Component decorations
1257 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
1258 {
Chris Forbes49d664d2019-02-12 19:24:50 +00001259 ApplyDecorationsForIdMember(&d, id, i);
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001260 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001261 d.Component = 0; // Implicit locations always have component=0
1262 }
1263 return d.Location;
1264 }
Chris Forbes5be4d702018-12-27 16:12:31 -08001265 case spv::OpTypeArray:
1266 {
1267 auto arraySize = GetConstantInt(obj.definition.word(3));
1268 for (auto i = 0u; i < arraySize; i++)
1269 {
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001270 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
Chris Forbes5be4d702018-12-27 16:12:31 -08001271 }
1272 return d.Location;
1273 }
Chris Forbes5839dcf2018-12-10 19:02:58 -08001274 default:
1275 // Intentionally partial; most opcodes do not participate in type hierarchies
1276 return 0;
1277 }
1278 }
1279
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001280 template<typename F>
Ben Claytonaf973b62019-03-13 18:19:20 +00001281 void SpirvShader::VisitInterface(Object::ID id, F f) const
Chris Forbes5839dcf2018-12-10 19:02:58 -08001282 {
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001283 // Walk a variable definition and call f for each component in it.
Chris Forbes5839dcf2018-12-10 19:02:58 -08001284 Decorations d{};
Chris Forbes49d664d2019-02-12 19:24:50 +00001285 ApplyDecorationsForId(&d, id);
Chris Forbes1c658232019-02-01 17:12:25 -08001286
1287 auto def = getObject(id).definition;
Ben Clayton6fae32c2019-02-28 20:06:42 +00001288 ASSERT(def.opcode() == spv::OpVariable);
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001289 VisitInterfaceInner<F>(def.word(1), d, f);
Chris Forbes5839dcf2018-12-10 19:02:58 -08001290 }
1291
Chris Forbese6419ad2019-04-11 12:23:10 -07001292 template<typename F>
1293 void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, F f) const
1294 {
1295 // Walk a type tree in an explicitly laid out storage class, calling
1296 // a functor for each scalar element within the object.
1297
1298 // The functor's first parameter is the index of the scalar element;
Ben Clayton97035bd2019-04-16 11:35:38 -04001299 // the second parameter is the offset (in bytes) from the base of the
1300 // object.
Chris Forbese6419ad2019-04-11 12:23:10 -07001301
1302 ApplyDecorationsForId(&d, id);
1303 auto const &type = getType(id);
1304
1305 if (d.HasOffset)
1306 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001307 offset += d.Offset;
Chris Forbese6419ad2019-04-11 12:23:10 -07001308 d.HasOffset = false;
1309 }
1310
1311 switch (type.opcode())
1312 {
1313 case spv::OpTypePointer:
1314 VisitMemoryObjectInner<F>(type.definition.word(3), d, index, offset, f);
1315 break;
1316 case spv::OpTypeInt:
1317 case spv::OpTypeFloat:
1318 f(index++, offset);
1319 break;
1320 case spv::OpTypeVector:
Chris Forbes98e6b962019-04-12 11:58:58 -07001321 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001322 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
Chris Forbese6419ad2019-04-11 12:23:10 -07001323 for (auto i = 0u; i < type.definition.word(3); i++)
1324 {
Chris Forbes98e6b962019-04-12 11:58:58 -07001325 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001326 }
1327 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001328 }
Chris Forbese6419ad2019-04-11 12:23:10 -07001329 case spv::OpTypeMatrix:
Chris Forbes98e6b962019-04-12 11:58:58 -07001330 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001331 auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
Chris Forbes98e6b962019-04-12 11:58:58 -07001332 d.InsideMatrix = true;
Chris Forbese6419ad2019-04-11 12:23:10 -07001333 for (auto i = 0u; i < type.definition.word(3); i++)
1334 {
1335 ASSERT(d.HasMatrixStride);
Chris Forbes98e6b962019-04-12 11:58:58 -07001336 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001337 }
1338 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001339 }
Chris Forbese6419ad2019-04-11 12:23:10 -07001340 case spv::OpTypeStruct:
1341 for (auto i = 0u; i < type.definition.wordCount() - 2; i++)
1342 {
1343 ApplyDecorationsForIdMember(&d, id, i);
1344 VisitMemoryObjectInner<F>(type.definition.word(i + 2), d, index, offset, f);
1345 }
1346 break;
1347 case spv::OpTypeArray:
1348 {
1349 auto arraySize = GetConstantInt(type.definition.word(3));
1350 for (auto i = 0u; i < arraySize; i++)
1351 {
1352 ASSERT(d.HasArrayStride);
Ben Clayton97035bd2019-04-16 11:35:38 -04001353 VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
Chris Forbese6419ad2019-04-11 12:23:10 -07001354 }
1355 break;
1356 }
1357 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001358 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbese6419ad2019-04-11 12:23:10 -07001359 }
1360 }
1361
1362 template<typename F>
1363 void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, F f) const
1364 {
1365 auto typeId = getObject(id).type;
1366 auto const & type = getType(typeId);
1367 if (!IsStorageInterleavedByLane(type.storageClass)) // TODO: really "is explicit layout"
1368 {
1369 Decorations d{};
1370 ApplyDecorationsForId(&d, id);
1371 uint32_t index = 0;
1372 VisitMemoryObjectInner<F>(typeId, d, index, 0, f);
1373 }
1374 else
1375 {
1376 // Objects without explicit layout are tightly packed.
1377 for (auto i = 0u; i < getType(type.element).sizeInComponents; i++)
1378 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001379 f(i, i * sizeof(float));
Chris Forbese6419ad2019-04-11 12:23:10 -07001380 }
1381 }
1382 }
1383
Ben Clayton3d497382019-04-08 16:16:12 -04001384 SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, int arrayIndex, SpirvRoutine *routine) const
Ben Clayton484e08e2019-04-05 12:11:39 +01001385 {
1386 auto &object = getObject(id);
1387 switch (object.kind)
1388 {
Ben Clayton1d514f32019-04-19 16:11:18 -04001389 case Object::Kind::Pointer:
Ben Clayton484e08e2019-04-05 12:11:39 +01001390 case Object::Kind::InterfaceVariable:
Ben Clayton5f7e9112019-04-16 11:03:40 -04001391 return routine->getPointer(id);
Ben Clayton484e08e2019-04-05 12:11:39 +01001392
Ben Clayton6b511342019-04-05 12:12:30 +01001393 case Object::Kind::DescriptorSet:
1394 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001395 const auto &d = descriptorDecorations.at(id);
1396 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
Ben Clayton6b511342019-04-05 12:12:30 +01001397 ASSERT(d.Binding >= 0);
1398
1399 auto set = routine->getPointer(id);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001400
Ben Clayton6b511342019-04-05 12:12:30 +01001401 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Ben Clayton8c56e8d2019-04-25 08:24:01 +01001402 ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
Alexis Hetu5078d482019-04-10 15:00:25 -04001403 int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
Ben Clayton6b511342019-04-05 12:12:30 +01001404
Chris Forbesbfbdd892019-04-27 12:11:29 -07001405 Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
1406 Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
1407 Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
Ben Clayton6b511342019-04-05 12:12:30 +01001408 if (setLayout->isBindingDynamic(d.Binding))
1409 {
1410 uint32_t dynamicBindingIndex =
1411 routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
1412 setLayout->getDynamicDescriptorOffset(d.Binding) +
1413 arrayIndex;
Chris Forbesbfbdd892019-04-27 12:11:29 -07001414 Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex];
1415 Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
1416 return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
Ben Clayton6b511342019-04-05 12:12:30 +01001417 }
Chris Forbesbfbdd892019-04-27 12:11:29 -07001418 else
1419 {
1420 return SIMD::Pointer(data, size);
1421 }
Ben Clayton6b511342019-04-05 12:12:30 +01001422 }
1423
Ben Clayton484e08e2019-04-05 12:11:39 +01001424 default:
1425 UNREACHABLE("Invalid pointer kind %d", int(object.kind));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001426 return SIMD::Pointer(Pointer<Byte>(), 0);
Ben Clayton484e08e2019-04-05 12:11:39 +01001427 }
1428 }
1429
Chris Forbes3610ded2019-04-22 18:12:13 -07001430 void SpirvShader::ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const
Chris Forbese6419ad2019-04-11 12:23:10 -07001431 {
1432 ApplyDecorationsForId(d, baseId);
1433 auto &baseObject = getObject(baseId);
1434 ApplyDecorationsForId(d, baseObject.type);
1435 auto typeId = getType(baseObject.type).element;
1436
1437 for (auto i = 0u; i < numIndexes; i++)
1438 {
1439 ApplyDecorationsForId(d, typeId);
1440 auto & type = getType(typeId);
1441 switch (type.opcode())
1442 {
1443 case spv::OpTypeStruct:
1444 {
1445 int memberIndex = GetConstantInt(indexIds[i]);
1446 ApplyDecorationsForIdMember(d, typeId, memberIndex);
1447 typeId = type.definition.word(2u + memberIndex);
1448 break;
1449 }
1450 case spv::OpTypeArray:
1451 case spv::OpTypeRuntimeArray:
Chris Forbes3610ded2019-04-22 18:12:13 -07001452 if (dd->InputAttachmentIndex >= 0)
1453 {
1454 dd->InputAttachmentIndex += GetConstantInt(indexIds[i]);
1455 }
1456 typeId = type.element;
1457 break;
Chris Forbese6419ad2019-04-11 12:23:10 -07001458 case spv::OpTypeVector:
1459 typeId = type.element;
1460 break;
Chris Forbes98e6b962019-04-12 11:58:58 -07001461 case spv::OpTypeMatrix:
1462 typeId = type.element;
1463 d->InsideMatrix = true;
1464 break;
Chris Forbese6419ad2019-04-11 12:23:10 -07001465 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001466 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
Chris Forbese6419ad2019-04-11 12:23:10 -07001467 }
1468 }
1469 }
1470
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001471 SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
Chris Forbesa30de542019-03-18 18:51:55 -07001472 {
1473 // Produce a offset into external memory in sizeof(float) units
1474
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001475 auto &baseObject = getObject(baseId);
Chris Forbesa30de542019-03-18 18:51:55 -07001476 Type::ID typeId = getType(baseObject.type).element;
Ben Clayton484e08e2019-04-05 12:11:39 +01001477 Decorations d = {};
Chris Forbesfe1dd4b2019-03-19 09:06:19 -07001478 ApplyDecorationsForId(&d, baseObject.type);
Chris Forbesa30de542019-03-18 18:51:55 -07001479
Alexis Hetu5078d482019-04-10 15:00:25 -04001480 uint32_t arrayIndex = 0;
Ben Clayton6b511342019-04-05 12:12:30 +01001481 if (baseObject.kind == Object::Kind::DescriptorSet)
1482 {
1483 auto type = getType(typeId).definition.opcode();
1484 if (type == spv::OpTypeArray || type == spv::OpTypeRuntimeArray)
1485 {
1486 ASSERT(getObject(indexIds[0]).kind == Object::Kind::Constant);
1487 arrayIndex = GetConstantInt(indexIds[0]);
1488
1489 numIndexes--;
1490 indexIds++;
1491 typeId = getType(typeId).element;
1492 }
1493 }
1494
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001495 auto ptr = GetPointerToData(baseId, arrayIndex, routine);
Ben Clayton484e08e2019-04-05 12:11:39 +01001496
1497 int constantOffset = 0;
Chris Forbesa30de542019-03-18 18:51:55 -07001498
1499 for (auto i = 0u; i < numIndexes; i++)
1500 {
1501 auto & type = getType(typeId);
Chris Forbese6419ad2019-04-11 12:23:10 -07001502 ApplyDecorationsForId(&d, typeId);
1503
Chris Forbesa30de542019-03-18 18:51:55 -07001504 switch (type.definition.opcode())
1505 {
1506 case spv::OpTypeStruct:
1507 {
1508 int memberIndex = GetConstantInt(indexIds[i]);
Chris Forbesa30de542019-03-18 18:51:55 -07001509 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
1510 ASSERT(d.HasOffset);
Ben Clayton97035bd2019-04-16 11:35:38 -04001511 constantOffset += d.Offset;
Chris Forbesa30de542019-03-18 18:51:55 -07001512 typeId = type.definition.word(2u + memberIndex);
1513 break;
1514 }
1515 case spv::OpTypeArray:
1516 case spv::OpTypeRuntimeArray:
1517 {
1518 // TODO: b/127950082: Check bounds.
Chris Forbesa30de542019-03-18 18:51:55 -07001519 ASSERT(d.HasArrayStride);
1520 auto & obj = getObject(indexIds[i]);
1521 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001522 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001523 constantOffset += d.ArrayStride * GetConstantInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001524 }
Chris Forbesa30de542019-03-18 18:51:55 -07001525 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001526 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001527 ptr += SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001528 }
Chris Forbesa30de542019-03-18 18:51:55 -07001529 typeId = type.element;
1530 break;
1531 }
1532 case spv::OpTypeMatrix:
1533 {
1534 // TODO: b/127950082: Check bounds.
Chris Forbesa30de542019-03-18 18:51:55 -07001535 ASSERT(d.HasMatrixStride);
Chris Forbes98e6b962019-04-12 11:58:58 -07001536 d.InsideMatrix = true;
Ben Clayton97035bd2019-04-16 11:35:38 -04001537 auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
Chris Forbesa30de542019-03-18 18:51:55 -07001538 auto & obj = getObject(indexIds[i]);
1539 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001540 {
Chris Forbes98e6b962019-04-12 11:58:58 -07001541 constantOffset += columnStride * GetConstantInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001542 }
Chris Forbesa30de542019-03-18 18:51:55 -07001543 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001544 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001545 ptr += SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001546 }
Chris Forbesa30de542019-03-18 18:51:55 -07001547 typeId = type.element;
1548 break;
1549 }
1550 case spv::OpTypeVector:
1551 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001552 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
Chris Forbesa30de542019-03-18 18:51:55 -07001553 auto & obj = getObject(indexIds[i]);
1554 if (obj.kind == Object::Kind::Constant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001555 {
Chris Forbes98e6b962019-04-12 11:58:58 -07001556 constantOffset += elemStride * GetConstantInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001557 }
Chris Forbesa30de542019-03-18 18:51:55 -07001558 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001559 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001560 ptr += SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001561 }
Chris Forbesa30de542019-03-18 18:51:55 -07001562 typeId = type.element;
1563 break;
1564 }
1565 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001566 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
Chris Forbesa30de542019-03-18 18:51:55 -07001567 }
1568 }
1569
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001570 ptr += constantOffset;
Ben Clayton3d497382019-04-08 16:16:12 -04001571 return ptr;
Chris Forbesa30de542019-03-18 18:51:55 -07001572 }
1573
Ben Clayton5f7e9112019-04-16 11:03:40 -04001574 SIMD::Pointer SpirvShader::WalkAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
Chris Forbes38f85b32019-02-12 20:10:05 +00001575 {
Chris Forbes38f85b32019-02-12 20:10:05 +00001576 // TODO: avoid doing per-lane work in some cases if we can?
1577
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001578 auto &baseObject = getObject(baseId);
Ben Claytonaf973b62019-03-13 18:19:20 +00001579 Type::ID typeId = getType(baseObject.type).element;
Chris Forbes38f85b32019-02-12 20:10:05 +00001580
Ben Clayton5f7e9112019-04-16 11:03:40 -04001581 auto ptr = routine->getPointer(baseId);
1582
1583 int constantOffset = 0;
Chris Forbes38f85b32019-02-12 20:10:05 +00001584
1585 for (auto i = 0u; i < numIndexes; i++)
1586 {
1587 auto & type = getType(typeId);
Nicolas Capens29090852019-03-19 16:22:35 -04001588 switch(type.opcode())
Chris Forbes38f85b32019-02-12 20:10:05 +00001589 {
1590 case spv::OpTypeStruct:
1591 {
1592 int memberIndex = GetConstantInt(indexIds[i]);
1593 int offsetIntoStruct = 0;
1594 for (auto j = 0; j < memberIndex; j++) {
Chris Forbes58bee562019-02-19 17:41:41 -08001595 auto memberType = type.definition.word(2u + j);
Ben Clayton97035bd2019-04-16 11:35:38 -04001596 offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
Chris Forbes38f85b32019-02-12 20:10:05 +00001597 }
Chris Forbes6397ed02019-02-15 16:39:17 -08001598 constantOffset += offsetIntoStruct;
Chris Forbes58bee562019-02-19 17:41:41 -08001599 typeId = type.definition.word(2u + memberIndex);
Chris Forbes38f85b32019-02-12 20:10:05 +00001600 break;
1601 }
1602
1603 case spv::OpTypeVector:
1604 case spv::OpTypeMatrix:
1605 case spv::OpTypeArray:
Ben Claytonfa8603c2019-03-08 16:51:42 +00001606 case spv::OpTypeRuntimeArray:
Chris Forbes38f85b32019-02-12 20:10:05 +00001607 {
Ben Claytonfa8603c2019-03-08 16:51:42 +00001608 // TODO: b/127950082: Check bounds.
Chris Forbes0b092cd2019-04-19 09:02:14 -07001609 if (getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
Ben Clayton5f7e9112019-04-16 11:03:40 -04001610 {
Chris Forbes0b092cd2019-04-19 09:02:14 -07001611 // indexing into an array of descriptors.
1612 auto &obj = getObject(indexIds[i]);
1613 if (obj.kind != Object::Kind::Constant)
1614 {
Ben Clayton92797c22019-04-25 10:44:03 +01001615 UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
Chris Forbes0b092cd2019-04-19 09:02:14 -07001616 }
1617
1618 auto d = descriptorDecorations.at(baseId);
1619 ASSERT(d.DescriptorSet >= 0);
1620 ASSERT(d.Binding >= 0);
1621 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1622 auto stride = setLayout->getBindingStride(d.Binding);
1623 ptr.base += stride * GetConstantInt(indexIds[i]);
Ben Clayton5f7e9112019-04-16 11:03:40 -04001624 }
Chris Forbes38f85b32019-02-12 20:10:05 +00001625 else
Ben Clayton5f7e9112019-04-16 11:03:40 -04001626 {
Chris Forbes0b092cd2019-04-19 09:02:14 -07001627 auto stride = getType(type.element).sizeInComponents * sizeof(float);
1628 auto & obj = getObject(indexIds[i]);
1629 if (obj.kind == Object::Kind::Constant)
1630 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001631 ptr += stride * GetConstantInt(indexIds[i]);
Chris Forbes0b092cd2019-04-19 09:02:14 -07001632 }
1633 else
1634 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001635 ptr += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
Chris Forbes0b092cd2019-04-19 09:02:14 -07001636 }
Ben Clayton5f7e9112019-04-16 11:03:40 -04001637 }
Ben Clayton9a162482019-02-25 11:54:43 +00001638 typeId = type.element;
Chris Forbes38f85b32019-02-12 20:10:05 +00001639 break;
1640 }
1641
1642 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001643 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbes38f85b32019-02-12 20:10:05 +00001644 }
1645 }
1646
Ben Clayton5f7e9112019-04-16 11:03:40 -04001647 if (constantOffset != 0)
1648 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04001649 ptr += constantOffset;
Ben Clayton5f7e9112019-04-16 11:03:40 -04001650 }
1651 return ptr;
Chris Forbes38f85b32019-02-12 20:10:05 +00001652 }
1653
Ben Claytonaf973b62019-03-13 18:19:20 +00001654 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
Chris Forbes9638b942019-02-21 18:39:31 -08001655 {
Ben Clayton97035bd2019-04-16 11:35:38 -04001656 uint32_t componentOffset = 0;
Chris Forbes9638b942019-02-21 18:39:31 -08001657
1658 for (auto i = 0u; i < numIndexes; i++)
1659 {
1660 auto & type = getType(typeId);
Nicolas Capens29090852019-03-19 16:22:35 -04001661 switch(type.opcode())
Chris Forbes9638b942019-02-21 18:39:31 -08001662 {
1663 case spv::OpTypeStruct:
1664 {
1665 int memberIndex = indexes[i];
1666 int offsetIntoStruct = 0;
1667 for (auto j = 0; j < memberIndex; j++) {
1668 auto memberType = type.definition.word(2u + j);
1669 offsetIntoStruct += getType(memberType).sizeInComponents;
1670 }
Ben Clayton97035bd2019-04-16 11:35:38 -04001671 componentOffset += offsetIntoStruct;
Chris Forbes9638b942019-02-21 18:39:31 -08001672 typeId = type.definition.word(2u + memberIndex);
1673 break;
1674 }
1675
1676 case spv::OpTypeVector:
1677 case spv::OpTypeMatrix:
1678 case spv::OpTypeArray:
1679 {
1680 auto elementType = type.definition.word(2);
1681 auto stride = getType(elementType).sizeInComponents;
Ben Clayton97035bd2019-04-16 11:35:38 -04001682 componentOffset += stride * indexes[i];
Chris Forbes9638b942019-02-21 18:39:31 -08001683 typeId = elementType;
1684 break;
1685 }
1686
1687 default:
Ben Clayton92797c22019-04-25 10:44:03 +01001688 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
Chris Forbes9638b942019-02-21 18:39:31 -08001689 }
1690 }
1691
Ben Clayton97035bd2019-04-16 11:35:38 -04001692 return componentOffset;
Chris Forbes9638b942019-02-21 18:39:31 -08001693 }
1694
Chris Forbesc25b8072018-12-10 15:10:39 -08001695 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
1696 {
1697 switch (decoration)
1698 {
1699 case spv::DecorationLocation:
1700 HasLocation = true;
1701 Location = static_cast<int32_t>(arg);
1702 break;
1703 case spv::DecorationComponent:
1704 HasComponent = true;
1705 Component = arg;
1706 break;
1707 case spv::DecorationBuiltIn:
1708 HasBuiltIn = true;
1709 BuiltIn = static_cast<spv::BuiltIn>(arg);
1710 break;
1711 case spv::DecorationFlat:
1712 Flat = true;
1713 break;
1714 case spv::DecorationNoPerspective:
Chris Forbes5839dcf2018-12-10 19:02:58 -08001715 NoPerspective = true;
Chris Forbesc25b8072018-12-10 15:10:39 -08001716 break;
1717 case spv::DecorationCentroid:
1718 Centroid = true;
1719 break;
1720 case spv::DecorationBlock:
1721 Block = true;
1722 break;
1723 case spv::DecorationBufferBlock:
1724 BufferBlock = true;
1725 break;
Chris Forbes65321072019-03-07 16:13:56 -08001726 case spv::DecorationOffset:
1727 HasOffset = true;
1728 Offset = static_cast<int32_t>(arg);
1729 break;
1730 case spv::DecorationArrayStride:
1731 HasArrayStride = true;
1732 ArrayStride = static_cast<int32_t>(arg);
1733 break;
1734 case spv::DecorationMatrixStride:
1735 HasMatrixStride = true;
1736 MatrixStride = static_cast<int32_t>(arg);
1737 break;
Ben Clayton8448cc52019-04-09 16:24:31 -04001738 case spv::DecorationRelaxedPrecision:
1739 RelaxedPrecision = true;
1740 break;
Chris Forbes1ba5ba72019-04-12 11:37:21 -07001741 case spv::DecorationRowMajor:
1742 HasRowMajor = true;
1743 RowMajor = true;
1744 break;
1745 case spv::DecorationColMajor:
1746 HasRowMajor = true;
1747 RowMajor = false;
Chris Forbesc25b8072018-12-10 15:10:39 -08001748 default:
1749 // Intentionally partial, there are many decorations we just don't care about.
1750 break;
1751 }
1752 }
1753
1754 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1755 {
1756 // Apply a decoration group to this set of decorations
1757 if (src.HasBuiltIn)
1758 {
1759 HasBuiltIn = true;
1760 BuiltIn = src.BuiltIn;
1761 }
1762
1763 if (src.HasLocation)
1764 {
1765 HasLocation = true;
1766 Location = src.Location;
1767 }
1768
1769 if (src.HasComponent)
1770 {
1771 HasComponent = true;
1772 Component = src.Component;
1773 }
1774
Chris Forbes65321072019-03-07 16:13:56 -08001775 if (src.HasOffset)
1776 {
1777 HasOffset = true;
1778 Offset = src.Offset;
1779 }
1780
1781 if (src.HasArrayStride)
1782 {
1783 HasArrayStride = true;
1784 ArrayStride = src.ArrayStride;
1785 }
1786
1787 if (src.HasMatrixStride)
1788 {
1789 HasMatrixStride = true;
1790 MatrixStride = src.MatrixStride;
1791 }
1792
Chris Forbes1ba5ba72019-04-12 11:37:21 -07001793 if (src.HasRowMajor)
1794 {
1795 HasRowMajor = true;
1796 RowMajor = src.RowMajor;
1797 }
1798
Chris Forbesc25b8072018-12-10 15:10:39 -08001799 Flat |= src.Flat;
Chris Forbes5839dcf2018-12-10 19:02:58 -08001800 NoPerspective |= src.NoPerspective;
Chris Forbesc25b8072018-12-10 15:10:39 -08001801 Centroid |= src.Centroid;
1802 Block |= src.Block;
1803 BufferBlock |= src.BufferBlock;
Ben Clayton8448cc52019-04-09 16:24:31 -04001804 RelaxedPrecision |= src.RelaxedPrecision;
Chris Forbes98e6b962019-04-12 11:58:58 -07001805 InsideMatrix |= src.InsideMatrix;
Chris Forbesc25b8072018-12-10 15:10:39 -08001806 }
Chris Forbesbc3a0ee2018-12-27 16:02:58 -08001807
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001808 void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src)
1809 {
1810 if(src.DescriptorSet >= 0)
1811 {
1812 DescriptorSet = src.DescriptorSet;
1813 }
1814
1815 if(src.Binding >= 0)
1816 {
1817 Binding = src.Binding;
1818 }
Chris Forbes24466042019-04-22 10:54:23 -07001819
1820 if (src.InputAttachmentIndex >= 0)
1821 {
1822 InputAttachmentIndex = src.InputAttachmentIndex;
1823 }
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001824 }
1825
Ben Claytonab51bbf2019-02-20 14:36:27 +00001826 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
Chris Forbes49d664d2019-02-12 19:24:50 +00001827 {
1828 auto it = decorations.find(id);
1829 if (it != decorations.end())
1830 d->Apply(it->second);
1831 }
1832
Ben Claytonaf973b62019-03-13 18:19:20 +00001833 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
Chris Forbes49d664d2019-02-12 19:24:50 +00001834 {
1835 auto it = memberDecorations.find(id);
1836 if (it != memberDecorations.end() && member < it->second.size())
1837 {
1838 d->Apply(it->second[member]);
1839 }
1840 }
1841
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001842 void SpirvShader::DefineResult(const InsnIterator &insn)
1843 {
1844 Type::ID typeId = insn.word(1);
1845 Object::ID resultId = insn.word(2);
1846 auto &object = defs[resultId];
1847 object.type = typeId;
Chris Forbes0b092cd2019-04-19 09:02:14 -07001848
1849 switch (getType(typeId).opcode())
1850 {
1851 case spv::OpTypePointer:
1852 case spv::OpTypeImage:
1853 case spv::OpTypeSampledImage:
1854 case spv::OpTypeSampler:
Ben Clayton1d514f32019-04-19 16:11:18 -04001855 object.kind = Object::Kind::Pointer;
Chris Forbes0b092cd2019-04-19 09:02:14 -07001856 break;
1857
1858 default:
1859 object.kind = Object::Kind::Intermediate;
1860 }
1861
Nicolas Capens82eb22e2019-04-10 01:15:43 -04001862 object.definition = insn;
1863 }
1864
Ben Claytonaf973b62019-03-13 18:19:20 +00001865 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
Chris Forbesbc3a0ee2018-12-27 16:02:58 -08001866 {
1867 // Slightly hackish access to constants very early in translation.
1868 // General consumption of constants by other instructions should
1869 // probably be just lowered to Reactor.
1870
1871 // TODO: not encountered yet since we only use this for array sizes etc,
1872 // but is possible to construct integer constant 0 via OpConstantNull.
Chris Forbesb8fb08a2019-02-13 11:45:27 -08001873 auto insn = getObject(id).definition;
Ben Clayton6fae32c2019-02-28 20:06:42 +00001874 ASSERT(insn.opcode() == spv::OpConstant);
Nicolas Capens29090852019-03-19 16:22:35 -04001875 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
Chris Forbesbc3a0ee2018-12-27 16:02:58 -08001876 return insn.word(3);
1877 }
Chris Forbesd5aed492019-02-02 15:18:52 -08001878
1879 // emit-time
1880
Chris Forbesc61271e2019-02-19 17:01:28 -08001881 void SpirvShader::emitProlog(SpirvRoutine *routine) const
Chris Forbesd5aed492019-02-02 15:18:52 -08001882 {
1883 for (auto insn : *this)
1884 {
1885 switch (insn.opcode())
1886 {
1887 case spv::OpVariable:
1888 {
Nicolas Capens29090852019-03-19 16:22:35 -04001889 Type::ID resultPointerTypeId = insn.word(1);
1890 auto resultPointerType = getType(resultPointerTypeId);
1891 auto pointeeType = getType(resultPointerType.element);
1892
1893 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
Chris Forbesd5aed492019-02-02 15:18:52 -08001894 {
Nicolas Capens29090852019-03-19 16:22:35 -04001895 Object::ID resultId = insn.word(2);
Ben Clayton47747612019-04-04 16:27:35 +01001896 routine->createVariable(resultId, pointeeType.sizeInComponents);
Chris Forbesd5aed492019-02-02 15:18:52 -08001897 }
1898 break;
1899 }
1900 default:
Chris Forbese9f8f5b2019-02-11 00:20:16 +00001901 // Nothing else produces interface variables, so can all be safely ignored.
Chris Forbesd5aed492019-02-02 15:18:52 -08001902 break;
1903 }
1904 }
1905 }
1906
Nicolas Capens09591b82019-04-08 22:51:08 -04001907 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
Chris Forbesd5aed492019-02-02 15:18:52 -08001908 {
Nicolas Capens09591b82019-04-08 22:51:08 -04001909 EmitState state(routine, activeLaneMask, descriptorSets);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001910
Ben Clayton9b156612019-03-13 19:48:31 +00001911 // Emit everything up to the first label
1912 // TODO: Separate out dispatch of block from non-block instructions?
Chris Forbesd5aed492019-02-02 15:18:52 -08001913 for (auto insn : *this)
1914 {
Ben Clayton9b156612019-03-13 19:48:31 +00001915 if (insn.opcode() == spv::OpLabel)
Chris Forbesd5aed492019-02-02 15:18:52 -08001916 {
Chris Forbesd5aed492019-02-02 15:18:52 -08001917 break;
1918 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001919 EmitInstruction(insn, &state);
Ben Clayton9b156612019-03-13 19:48:31 +00001920 }
1921
Ben Clayton60f15ec2019-05-09 17:50:01 +01001922 // Emit all the blocks starting from entryPointBlockId.
1923 EmitBlocks(entryPointBlockId, &state);
Ben Clayton513ed1d2019-03-28 16:07:00 +00001924 }
1925
1926 void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
1927 {
1928 auto oldPending = state->pending;
1929
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001930 std::queue<Block::ID> pending;
Ben Clayton513ed1d2019-03-28 16:07:00 +00001931 state->pending = &pending;
1932 pending.push(id);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001933 while (pending.size() > 0)
Ben Clayton9b156612019-03-13 19:48:31 +00001934 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001935 auto id = pending.front();
1936 pending.pop();
Ben Clayton513ed1d2019-03-28 16:07:00 +00001937
1938 auto const &block = getBlock(id);
1939 if (id == ignore)
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001940 {
Ben Clayton513ed1d2019-03-28 16:07:00 +00001941 continue;
1942 }
1943
1944 state->currentBlock = id;
1945
1946 switch (block.kind)
1947 {
1948 case Block::Simple:
1949 case Block::StructuredBranchConditional:
1950 case Block::UnstructuredBranchConditional:
1951 case Block::StructuredSwitch:
1952 case Block::UnstructuredSwitch:
1953 EmitNonLoop(state);
1954 break;
1955
1956 case Block::Loop:
1957 EmitLoop(state);
1958 break;
1959
1960 default:
1961 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001962 }
Ben Clayton9b156612019-03-13 19:48:31 +00001963 }
Ben Clayton9b156612019-03-13 19:48:31 +00001964
Ben Clayton513ed1d2019-03-28 16:07:00 +00001965 state->pending = oldPending;
Ben Claytonc0cf68b2019-03-21 17:46:08 +00001966 }
1967
1968 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1969 {
1970 for (auto insn = begin; insn != end; insn++)
1971 {
1972 auto res = EmitInstruction(insn, state);
1973 switch (res)
1974 {
1975 case EmitResult::Continue:
1976 continue;
1977 case EmitResult::Terminator:
1978 break;
1979 default:
1980 UNREACHABLE("Unexpected EmitResult %d", int(res));
1981 break;
1982 }
1983 }
1984 }
1985
Ben Clayton513ed1d2019-03-28 16:07:00 +00001986 void SpirvShader::EmitNonLoop(EmitState *state) const
1987 {
1988 auto blockId = state->currentBlock;
1989 auto block = getBlock(blockId);
1990
1991 // Ensure all incoming blocks have been generated.
1992 auto depsDone = true;
1993 for (auto in : block.ins)
1994 {
1995 if (state->visited.count(in) == 0)
1996 {
1997 state->pending->emplace(in);
1998 depsDone = false;
1999 }
2000 }
2001
2002 if (!depsDone)
2003 {
2004 // come back to this once the dependencies have been generated
2005 state->pending->emplace(blockId);
2006 return;
2007 }
2008
2009 if (!state->visited.emplace(blockId).second)
2010 {
2011 return; // Already generated this block.
2012 }
2013
Ben Clayton60f15ec2019-05-09 17:50:01 +01002014 if (blockId != entryPointBlockId)
Ben Clayton513ed1d2019-03-28 16:07:00 +00002015 {
2016 // Set the activeLaneMask.
Nicolas Capens459453a2019-03-27 15:27:27 -04002017 SIMD::Int activeLaneMask(0);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002018 for (auto in : block.ins)
2019 {
2020 auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
Nicolas Capens459453a2019-03-27 15:27:27 -04002021 activeLaneMask |= inMask;
Ben Clayton513ed1d2019-03-28 16:07:00 +00002022 }
Nicolas Capens459453a2019-03-27 15:27:27 -04002023 state->setActiveLaneMask(activeLaneMask);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002024 }
2025
2026 EmitInstructions(block.begin(), block.end(), state);
2027
2028 for (auto out : block.outs)
2029 {
2030 state->pending->emplace(out);
2031 }
2032 }
2033
Ben Claytone747b3c2019-03-21 19:35:15 +00002034 void SpirvShader::EmitLoop(EmitState *state) const
2035 {
2036 auto blockId = state->currentBlock;
2037 auto block = getBlock(blockId);
2038
Ben Clayton513ed1d2019-03-28 16:07:00 +00002039 // Ensure all incoming non-back edge blocks have been generated.
2040 auto depsDone = true;
2041 for (auto in : block.ins)
2042 {
2043 if (state->visited.count(in) == 0)
2044 {
2045 if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
2046 {
2047 state->pending->emplace(in);
2048 depsDone = false;
2049 }
2050 }
2051 }
2052
2053 if (!depsDone)
2054 {
2055 // come back to this once the dependencies have been generated
2056 state->pending->emplace(blockId);
2057 return;
2058 }
2059
2060 if (!state->visited.emplace(blockId).second)
2061 {
2062 return; // Already emitted this loop.
2063 }
2064
Ben Claytone747b3c2019-03-21 19:35:15 +00002065 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
2066 // This is initialized with the incoming active lane masks.
2067 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
2068 for (auto in : block.ins)
2069 {
Ben Clayton513ed1d2019-03-28 16:07:00 +00002070 if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
Ben Claytone747b3c2019-03-21 19:35:15 +00002071 {
Ben Claytonfe3f0132019-03-26 11:10:16 +00002072 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
Ben Claytone747b3c2019-03-21 19:35:15 +00002073 }
2074 }
2075
2076 // Generate an alloca for each of the loop's phis.
2077 // These will be primed with the incoming, non back edge Phi values
2078 // before the loop, and then updated just before the loop jumps back to
2079 // the block.
2080 struct LoopPhi
2081 {
Nicolas Capens5da8d8d2019-03-27 14:45:34 -04002082 LoopPhi(Object::ID id, uint32_t size) : phiId(id), storage(size) {}
2083
Ben Claytone747b3c2019-03-21 19:35:15 +00002084 Object::ID phiId; // The Phi identifier.
2085 Object::ID continueValue; // The source merge value from the loop.
2086 Array<SIMD::Int> storage; // The alloca.
2087 };
2088
2089 std::vector<LoopPhi> phis;
2090
2091 // For each OpPhi between the block start and the merge instruction:
2092 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
2093 {
2094 if (insn.opcode() == spv::OpPhi)
2095 {
2096 auto objectId = Object::ID(insn.word(2));
2097 auto &object = getObject(objectId);
2098 auto &type = getType(object.type);
2099
Nicolas Capens5da8d8d2019-03-27 14:45:34 -04002100 LoopPhi phi(insn.word(2), type.sizeInComponents);
Ben Claytone747b3c2019-03-21 19:35:15 +00002101
2102 // Start with the Phi set to 0.
2103 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2104 {
2105 phi.storage[i] = SIMD::Int(0);
2106 }
2107
2108 // For each Phi source:
2109 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2110 {
2111 auto varId = Object::ID(insn.word(w + 0));
2112 auto blockId = Block::ID(insn.word(w + 1));
Ben Clayton5d143aa2019-04-03 13:30:14 +01002113
2114 if (block.ins.count(blockId) == 0)
2115 {
2116 continue; // In is unreachable. Ignore.
2117 }
2118
Ben Clayton513ed1d2019-03-28 16:07:00 +00002119 if (existsPath(state->currentBlock, blockId, block.mergeBlock))
Ben Claytone747b3c2019-03-21 19:35:15 +00002120 {
2121 // This source is from a loop back-edge.
2122 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
2123 phi.continueValue = varId;
2124 }
2125 else
2126 {
2127 // This source is from a preceding block.
2128 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2129 {
2130 auto in = GenericValue(this, state->routine, varId);
Ben Claytonfe3f0132019-03-26 11:10:16 +00002131 auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
Ben Claytone747b3c2019-03-21 19:35:15 +00002132 phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
2133 }
2134 }
2135 }
2136
2137 phis.push_back(phi);
2138 }
2139 }
2140
2141 // Create the loop basic blocks
2142 auto headerBasicBlock = Nucleus::createBasicBlock();
2143 auto mergeBasicBlock = Nucleus::createBasicBlock();
2144
2145 // Start emitting code inside the loop.
2146 Nucleus::createBr(headerBasicBlock);
2147 Nucleus::setInsertBlock(headerBasicBlock);
2148
2149 // Load the Phi values from storage.
2150 // This will load at the start of each loop.
2151 for (auto &phi : phis)
2152 {
2153 auto &type = getType(getObject(phi.phiId).type);
2154 auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
2155 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
2156 {
2157 dst.move(i, phi.storage[i]);
2158 }
2159 }
2160
2161 // Load the active lane mask.
2162 state->setActiveLaneMask(loopActiveLaneMask);
2163
2164 // Emit all the non-phi instructions in this loop header block.
2165 for (auto insn = block.begin(); insn != block.end(); insn++)
2166 {
2167 if (insn.opcode() != spv::OpPhi)
2168 {
2169 EmitInstruction(insn, state);
2170 }
2171 }
2172
Ben Clayton513ed1d2019-03-28 16:07:00 +00002173 // Emit all loop blocks, but don't emit the merge block yet.
2174 for (auto out : block.outs)
2175 {
2176 if (existsPath(out, blockId, block.mergeBlock))
2177 {
2178 EmitBlocks(out, state, block.mergeBlock);
2179 }
2180 }
2181
2182 // Rebuild the loopActiveLaneMask from the loop back edges.
Ben Claytone747b3c2019-03-21 19:35:15 +00002183 loopActiveLaneMask = SIMD::Int(0);
2184 for (auto in : block.ins)
2185 {
Ben Clayton513ed1d2019-03-28 16:07:00 +00002186 if (existsPath(blockId, in, block.mergeBlock))
Ben Claytone747b3c2019-03-21 19:35:15 +00002187 {
Ben Claytonfe3f0132019-03-26 11:10:16 +00002188 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
Ben Claytone747b3c2019-03-21 19:35:15 +00002189 }
2190 }
2191
2192 // Update loop phi values
2193 for (auto &phi : phis)
2194 {
2195 if (phi.continueValue != 0)
2196 {
2197 auto val = GenericValue(this, state->routine, phi.continueValue);
2198 auto &type = getType(getObject(phi.phiId).type);
2199 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
2200 {
2201 phi.storage[i] = val.Int(i);
2202 }
2203 }
2204 }
2205
2206 // Loop body now done.
2207 // If any lanes are still active, jump back to the loop header,
2208 // otherwise jump to the merge block.
2209 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
2210
Ben Clayton513ed1d2019-03-28 16:07:00 +00002211 // Continue emitting from the merge block.
Ben Claytone747b3c2019-03-21 19:35:15 +00002212 Nucleus::setInsertBlock(mergeBasicBlock);
Ben Clayton513ed1d2019-03-28 16:07:00 +00002213 state->pending->emplace(block.mergeBlock);
Ben Claytone747b3c2019-03-21 19:35:15 +00002214 }
2215
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002216 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
Ben Clayton9b156612019-03-13 19:48:31 +00002217 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002218 auto opcode = insn.opcode();
2219
2220 switch (opcode)
Ben Clayton9b156612019-03-13 19:48:31 +00002221 {
2222 case spv::OpTypeVoid:
2223 case spv::OpTypeInt:
2224 case spv::OpTypeFloat:
2225 case spv::OpTypeBool:
2226 case spv::OpTypeVector:
2227 case spv::OpTypeArray:
2228 case spv::OpTypeRuntimeArray:
2229 case spv::OpTypeMatrix:
2230 case spv::OpTypeStruct:
2231 case spv::OpTypePointer:
2232 case spv::OpTypeFunction:
Nicolas Capens7d867272019-04-08 22:51:08 -04002233 case spv::OpTypeImage:
2234 case spv::OpTypeSampledImage:
Chris Forbesfa82c342019-04-26 16:42:38 -07002235 case spv::OpTypeSampler:
Ben Clayton9b156612019-03-13 19:48:31 +00002236 case spv::OpExecutionMode:
2237 case spv::OpMemoryModel:
2238 case spv::OpFunction:
2239 case spv::OpFunctionEnd:
2240 case spv::OpConstant:
2241 case spv::OpConstantNull:
2242 case spv::OpConstantTrue:
2243 case spv::OpConstantFalse:
2244 case spv::OpConstantComposite:
Chris Forbes0e712412019-03-18 19:31:16 -07002245 case spv::OpUndef:
Ben Clayton9b156612019-03-13 19:48:31 +00002246 case spv::OpExtension:
2247 case spv::OpCapability:
2248 case spv::OpEntryPoint:
2249 case spv::OpExtInstImport:
2250 case spv::OpDecorate:
2251 case spv::OpMemberDecorate:
2252 case spv::OpGroupDecorate:
2253 case spv::OpGroupMemberDecorate:
2254 case spv::OpDecorationGroup:
2255 case spv::OpName:
2256 case spv::OpMemberName:
2257 case spv::OpSource:
2258 case spv::OpSourceContinued:
2259 case spv::OpSourceExtension:
2260 case spv::OpLine:
2261 case spv::OpNoLine:
2262 case spv::OpModuleProcessed:
2263 case spv::OpString:
2264 // Nothing to do at emit time. These are either fully handled at analysis time,
2265 // or don't require any work at all.
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002266 return EmitResult::Continue;
Ben Clayton9b156612019-03-13 19:48:31 +00002267
2268 case spv::OpLabel:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002269 return EmitResult::Continue;
Ben Clayton9b156612019-03-13 19:48:31 +00002270
2271 case spv::OpVariable:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002272 return EmitVariable(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002273
2274 case spv::OpLoad:
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002275 case spv::OpAtomicLoad:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002276 return EmitLoad(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002277
2278 case spv::OpStore:
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002279 case spv::OpAtomicStore:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002280 return EmitStore(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002281
Chris Forbes17813932019-04-18 11:45:54 -07002282 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -07002283 case spv::OpAtomicISub:
Chris Forbes17813932019-04-18 11:45:54 -07002284 case spv::OpAtomicSMin:
2285 case spv::OpAtomicSMax:
2286 case spv::OpAtomicUMin:
2287 case spv::OpAtomicUMax:
2288 case spv::OpAtomicAnd:
2289 case spv::OpAtomicOr:
2290 case spv::OpAtomicXor:
Chris Forbes707ed992019-04-18 18:17:35 -07002291 case spv::OpAtomicIIncrement:
2292 case spv::OpAtomicIDecrement:
Chris Forbes17813932019-04-18 11:45:54 -07002293 case spv::OpAtomicExchange:
2294 return EmitAtomicOp(insn, state);
2295
Chris Forbesa16238d2019-04-18 16:31:54 -07002296 case spv::OpAtomicCompareExchange:
2297 return EmitAtomicCompareExchange(insn, state);
2298
Ben Clayton9b156612019-03-13 19:48:31 +00002299 case spv::OpAccessChain:
Chris Forbes10fd6242019-03-15 12:27:34 -07002300 case spv::OpInBoundsAccessChain:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002301 return EmitAccessChain(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002302
2303 case spv::OpCompositeConstruct:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002304 return EmitCompositeConstruct(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002305
2306 case spv::OpCompositeInsert:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002307 return EmitCompositeInsert(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002308
2309 case spv::OpCompositeExtract:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002310 return EmitCompositeExtract(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002311
2312 case spv::OpVectorShuffle:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002313 return EmitVectorShuffle(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002314
Chris Forbesfaed9d32019-03-15 10:31:08 -07002315 case spv::OpVectorExtractDynamic:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002316 return EmitVectorExtractDynamic(insn, state);
Chris Forbesfaed9d32019-03-15 10:31:08 -07002317
2318 case spv::OpVectorInsertDynamic:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002319 return EmitVectorInsertDynamic(insn, state);
Chris Forbesfaed9d32019-03-15 10:31:08 -07002320
Ben Clayton9b156612019-03-13 19:48:31 +00002321 case spv::OpVectorTimesScalar:
Chris Forbes57e05b82019-03-28 09:16:20 +13002322 case spv::OpMatrixTimesScalar:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002323 return EmitVectorTimesScalar(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002324
Chris Forbes06f4ed72019-03-28 09:53:20 +13002325 case spv::OpMatrixTimesVector:
2326 return EmitMatrixTimesVector(insn, state);
2327
Chris Forbesa563dd82019-03-28 10:32:55 +13002328 case spv::OpVectorTimesMatrix:
2329 return EmitVectorTimesMatrix(insn, state);
2330
Chris Forbes51562f12019-03-28 19:08:39 -07002331 case spv::OpMatrixTimesMatrix:
2332 return EmitMatrixTimesMatrix(insn, state);
2333
Ben Clayton3ee52992019-04-08 11:01:23 -04002334 case spv::OpOuterProduct:
2335 return EmitOuterProduct(insn, state);
2336
Ben Clayton620f7082019-04-08 11:12:08 -04002337 case spv::OpTranspose:
2338 return EmitTranspose(insn, state);
2339
Ben Clayton9b156612019-03-13 19:48:31 +00002340 case spv::OpNot:
Ben Claytonb5bfa502019-04-08 14:26:36 -04002341 case spv::OpBitFieldInsert:
Ben Claytond86db952019-04-08 13:43:11 -04002342 case spv::OpBitFieldSExtract:
2343 case spv::OpBitFieldUExtract:
Ben Claytond2a46432019-04-08 11:41:45 -04002344 case spv::OpBitReverse:
Ben Clayton1eb017d2019-04-08 11:32:09 -04002345 case spv::OpBitCount:
Ben Clayton9b156612019-03-13 19:48:31 +00002346 case spv::OpSNegate:
2347 case spv::OpFNegate:
2348 case spv::OpLogicalNot:
2349 case spv::OpConvertFToU:
2350 case spv::OpConvertFToS:
2351 case spv::OpConvertSToF:
2352 case spv::OpConvertUToF:
2353 case spv::OpBitcast:
2354 case spv::OpIsInf:
2355 case spv::OpIsNan:
Chris Forbesaff2dd02019-03-20 14:50:24 -07002356 case spv::OpDPdx:
2357 case spv::OpDPdxCoarse:
2358 case spv::OpDPdy:
2359 case spv::OpDPdyCoarse:
2360 case spv::OpFwidth:
2361 case spv::OpFwidthCoarse:
2362 case spv::OpDPdxFine:
2363 case spv::OpDPdyFine:
2364 case spv::OpFwidthFine:
Ben Clayton64da4ae2019-04-19 12:34:06 -04002365 case spv::OpQuantizeToF16:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002366 return EmitUnaryOp(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002367
2368 case spv::OpIAdd:
2369 case spv::OpISub:
2370 case spv::OpIMul:
2371 case spv::OpSDiv:
2372 case spv::OpUDiv:
2373 case spv::OpFAdd:
2374 case spv::OpFSub:
2375 case spv::OpFMul:
2376 case spv::OpFDiv:
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07002377 case spv::OpFMod:
Chris Forbes1a4c7122019-03-15 14:50:47 -07002378 case spv::OpFRem:
Ben Clayton9b156612019-03-13 19:48:31 +00002379 case spv::OpFOrdEqual:
2380 case spv::OpFUnordEqual:
2381 case spv::OpFOrdNotEqual:
2382 case spv::OpFUnordNotEqual:
2383 case spv::OpFOrdLessThan:
2384 case spv::OpFUnordLessThan:
2385 case spv::OpFOrdGreaterThan:
2386 case spv::OpFUnordGreaterThan:
2387 case spv::OpFOrdLessThanEqual:
2388 case spv::OpFUnordLessThanEqual:
2389 case spv::OpFOrdGreaterThanEqual:
2390 case spv::OpFUnordGreaterThanEqual:
2391 case spv::OpSMod:
Chris Forbes71673c82019-03-14 12:55:20 -07002392 case spv::OpSRem:
Ben Clayton9b156612019-03-13 19:48:31 +00002393 case spv::OpUMod:
2394 case spv::OpIEqual:
2395 case spv::OpINotEqual:
2396 case spv::OpUGreaterThan:
2397 case spv::OpSGreaterThan:
2398 case spv::OpUGreaterThanEqual:
2399 case spv::OpSGreaterThanEqual:
2400 case spv::OpULessThan:
2401 case spv::OpSLessThan:
2402 case spv::OpULessThanEqual:
2403 case spv::OpSLessThanEqual:
2404 case spv::OpShiftRightLogical:
2405 case spv::OpShiftRightArithmetic:
2406 case spv::OpShiftLeftLogical:
2407 case spv::OpBitwiseOr:
2408 case spv::OpBitwiseXor:
2409 case spv::OpBitwiseAnd:
2410 case spv::OpLogicalOr:
2411 case spv::OpLogicalAnd:
2412 case spv::OpLogicalEqual:
2413 case spv::OpLogicalNotEqual:
2414 case spv::OpUMulExtended:
2415 case spv::OpSMulExtended:
Chris Forbes3e6f60b2019-05-08 17:28:10 -07002416 case spv::OpIAddCarry:
2417 case spv::OpISubBorrow:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002418 return EmitBinaryOp(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002419
2420 case spv::OpDot:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002421 return EmitDot(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002422
2423 case spv::OpSelect:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002424 return EmitSelect(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002425
2426 case spv::OpExtInst:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002427 return EmitExtendedInstruction(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002428
2429 case spv::OpAny:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002430 return EmitAny(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002431
2432 case spv::OpAll:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002433 return EmitAll(insn, state);
Ben Clayton9b156612019-03-13 19:48:31 +00002434
Ben Claytone37ce612019-03-13 19:57:42 +00002435 case spv::OpBranch:
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002436 return EmitBranch(insn, state);
Ben Claytone37ce612019-03-13 19:57:42 +00002437
Ben Clayton9fd02e02019-03-21 18:47:15 +00002438 case spv::OpPhi:
2439 return EmitPhi(insn, state);
2440
2441 case spv::OpSelectionMerge:
Ben Claytone747b3c2019-03-21 19:35:15 +00002442 case spv::OpLoopMerge:
Ben Clayton9fd02e02019-03-21 18:47:15 +00002443 return EmitResult::Continue;
2444
2445 case spv::OpBranchConditional:
2446 return EmitBranchConditional(insn, state);
2447
Ben Clayton213a8ce2019-03-21 18:57:23 +00002448 case spv::OpSwitch:
2449 return EmitSwitch(insn, state);
2450
Ben Clayton9fd02e02019-03-21 18:47:15 +00002451 case spv::OpUnreachable:
2452 return EmitUnreachable(insn, state);
2453
2454 case spv::OpReturn:
2455 return EmitReturn(insn, state);
2456
Chris Forbes97e95892019-04-02 13:37:37 +13002457 case spv::OpKill:
2458 return EmitKill(insn, state);
2459
Nicolas Capens7d867272019-04-08 22:51:08 -04002460 case spv::OpImageSampleImplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -04002461 return EmitImageSampleImplicitLod(None, insn, state);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07002462
Nicolas Capens125dba02019-04-24 02:03:22 -04002463 case spv::OpImageSampleExplicitLod:
Nicolas Capens5b09dd12019-04-30 01:05:28 -04002464 return EmitImageSampleExplicitLod(None, insn, state);
2465
2466 case spv::OpImageSampleDrefImplicitLod:
2467 return EmitImageSampleImplicitLod(Dref, insn, state);
2468
2469 case spv::OpImageSampleDrefExplicitLod:
2470 return EmitImageSampleExplicitLod(Dref, insn, state);
2471
2472 case spv::OpImageSampleProjImplicitLod:
2473 return EmitImageSampleImplicitLod(Proj, insn, state);
2474
2475 case spv::OpImageSampleProjExplicitLod:
2476 return EmitImageSampleExplicitLod(Proj, insn, state);
2477
2478 case spv::OpImageSampleProjDrefImplicitLod:
2479 return EmitImageSampleImplicitLod(ProjDref, insn, state);
2480
2481 case spv::OpImageSampleProjDrefExplicitLod:
2482 return EmitImageSampleExplicitLod(ProjDref, insn, state);
Nicolas Capens125dba02019-04-24 02:03:22 -04002483
Chris Forbescd631592019-04-27 10:37:18 -07002484 case spv::OpImageFetch:
2485 return EmitImageFetch(insn, state);
2486
Chris Forbesb0d00ea2019-04-17 20:24:20 -07002487 case spv::OpImageQuerySize:
2488 return EmitImageQuerySize(insn, state);
2489
Ben Clayton0264d8e2019-05-08 15:39:40 +01002490 case spv::OpImageQuerySizeLod:
2491 return EmitImageQuerySizeLod(insn, state);
2492
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07002493 case spv::OpImageRead:
2494 return EmitImageRead(insn, state);
2495
Chris Forbes179f0142019-04-17 20:24:44 -07002496 case spv::OpImageWrite:
2497 return EmitImageWrite(insn, state);
2498
Chris Forbesb51f2c12019-04-18 11:01:30 -07002499 case spv::OpImageTexelPointer:
2500 return EmitImageTexelPointer(insn, state);
2501
Chris Forbesfa82c342019-04-26 16:42:38 -07002502 case spv::OpSampledImage:
2503 case spv::OpImage:
2504 return EmitSampledImageCombineOrSplit(insn, state);
2505
Ben Clayton78abf372019-05-09 15:11:58 +01002506 case spv::OpCopyObject:
2507 return EmitCopyObject(insn, state);
2508
Ben Claytonb5a45462019-04-30 19:21:29 +01002509 case spv::OpCopyMemory:
2510 return EmitCopyMemory(insn, state);
2511
Ben Claytonecfeede2019-05-08 08:51:01 +01002512 case spv::OpControlBarrier:
2513 return EmitControlBarrier(insn, state);
2514
Ben Claytonb16c5862019-05-08 14:01:38 +01002515 case spv::OpMemoryBarrier:
2516 return EmitMemoryBarrier(insn, state);
2517
Ben Clayton32d47972019-04-19 17:08:15 -04002518 case spv::OpGroupNonUniformElect:
2519 return EmitGroupNonUniform(insn, state);
2520
Ben Claytone4605da2019-05-09 16:24:01 +01002521 case spv::OpArrayLength:
2522 return EmitArrayLength(insn, state);
2523
Ben Clayton9b156612019-03-13 19:48:31 +00002524 default:
Ben Clayton92797c22019-04-25 10:44:03 +01002525 UNREACHABLE("%s", OpcodeName(opcode).c_str());
Ben Clayton9b156612019-03-13 19:48:31 +00002526 break;
Chris Forbesd5aed492019-02-02 15:18:52 -08002527 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002528
2529 return EmitResult::Continue;
Chris Forbesd5aed492019-02-02 15:18:52 -08002530 }
Chris Forbesc61271e2019-02-19 17:01:28 -08002531
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002532 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002533 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002534 auto routine = state->routine;
Ben Claytonaf973b62019-03-13 18:19:20 +00002535 Object::ID resultId = insn.word(2);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002536 auto &object = getObject(resultId);
2537 auto &objectTy = getType(object.type);
Ben Clayton484e08e2019-04-05 12:11:39 +01002538
Ben Claytonefec1b92019-03-05 17:38:16 +00002539 switch (objectTy.storageClass)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002540 {
Ben Clayton484e08e2019-04-05 12:11:39 +01002541 case spv::StorageClassOutput:
2542 case spv::StorageClassPrivate:
2543 case spv::StorageClassFunction:
2544 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002545 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002546 auto base = &routine->getVariable(resultId)[0];
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002547 auto elementTy = getType(objectTy.element);
2548 auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
2549 routine->createPointer(resultId, SIMD::Pointer(base, size));
Ben Clayton484e08e2019-04-05 12:11:39 +01002550 break;
2551 }
Ben Claytonecd38482019-04-19 17:11:08 -04002552 case spv::StorageClassWorkgroup:
2553 {
2554 ASSERT(objectTy.opcode() == spv::OpTypePointer);
2555 auto base = &routine->workgroupMemory[0];
2556 auto size = workgroupMemory.size();
2557 routine->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
2558 break;
2559 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002560 case spv::StorageClassInput:
2561 {
2562 if (object.kind == Object::Kind::InterfaceVariable)
2563 {
Ben Clayton47747612019-04-04 16:27:35 +01002564 auto &dst = routine->getVariable(resultId);
Ben Claytonefec1b92019-03-05 17:38:16 +00002565 int offset = 0;
2566 VisitInterface(resultId,
2567 [&](Decorations const &d, AttribType type) {
2568 auto scalarSlot = d.Location << 2 | d.Component;
2569 dst[offset++] = routine->inputs[scalarSlot];
2570 });
2571 }
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002572 ASSERT(objectTy.opcode() == spv::OpTypePointer);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002573 auto base = &routine->getVariable(resultId)[0];
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002574 auto elementTy = getType(objectTy.element);
2575 auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
2576 routine->createPointer(resultId, SIMD::Pointer(base, size));
Ben Claytonefec1b92019-03-05 17:38:16 +00002577 break;
2578 }
Nicolas Capens7d867272019-04-08 22:51:08 -04002579 case spv::StorageClassUniformConstant:
2580 {
2581 const auto &d = descriptorDecorations.at(resultId);
2582 ASSERT(d.DescriptorSet >= 0);
2583 ASSERT(d.Binding >= 0);
2584
2585 uint32_t arrayIndex = 0; // TODO(b/129523279)
2586 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Ben Clayton8c56e8d2019-04-25 08:24:01 +01002587 if (setLayout->hasBinding(d.Binding))
2588 {
2589 size_t bindingOffset = setLayout->getBindingOffset(d.Binding, arrayIndex);
2590 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
2591 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
2592 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2593 routine->createPointer(resultId, SIMD::Pointer(binding, size));
2594 }
2595 else
2596 {
2597 // TODO: Error if the variable with the non-existant binding is
2598 // used? Or perhaps strip these unused variable declarations as
2599 // a preprocess on the SPIR-V?
2600 }
Nicolas Capens7d867272019-04-08 22:51:08 -04002601 break;
2602 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002603 case spv::StorageClassUniform:
2604 case spv::StorageClassStorageBuffer:
2605 {
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002606 const auto &d = descriptorDecorations.at(resultId);
2607 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002608 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2609 routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
Ben Claytonefec1b92019-03-05 17:38:16 +00002610 break;
2611 }
Chris Forbesa30de542019-03-18 18:51:55 -07002612 case spv::StorageClassPushConstant:
2613 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002614 routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
Chris Forbesa30de542019-03-18 18:51:55 -07002615 break;
2616 }
Ben Claytonefec1b92019-03-05 17:38:16 +00002617 default:
Ben Clayton92797c22019-04-25 10:44:03 +01002618 UNREACHABLE("Storage class %d", objectTy.storageClass);
Ben Claytonefec1b92019-03-05 17:38:16 +00002619 break;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002620 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002621
Ben Clayton05f27a32019-04-30 16:15:25 +01002622 if (insn.wordCount() > 4)
2623 {
2624 Object::ID initializerId = insn.word(4);
2625 if (getObject(initializerId).kind != Object::Kind::Constant)
2626 {
2627 UNIMPLEMENTED("Non-constant initializers not yet implemented");
2628 }
2629 switch (objectTy.storageClass)
2630 {
2631 case spv::StorageClassOutput:
2632 case spv::StorageClassPrivate:
2633 case spv::StorageClassFunction:
2634 {
2635 bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
2636 auto ptr = routine->getPointer(resultId);
2637 GenericValue initialValue(this, routine, initializerId);
2638 VisitMemoryObject(resultId, [&](uint32_t i, uint32_t offset)
2639 {
2640 auto p = ptr + offset;
2641 if (interleavedByLane) { p = interleaveByLane(p); }
2642 SIMD::Store(p, initialValue.Float(i), state->activeLaneMask());
2643 });
2644 break;
2645 }
2646 default:
2647 ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
2648 }
2649 }
2650
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002651 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002652 }
2653
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002654 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002655 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002656 auto routine = state->routine;
Nicolas Capens86509d92019-03-21 13:23:50 -04002657 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002658 Object::ID resultId = insn.word(2);
Ben Claytonaf973b62019-03-13 18:19:20 +00002659 Object::ID pointerId = insn.word(3);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002660 auto &result = getObject(resultId);
2661 auto &resultTy = getType(result.type);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002662 auto &pointer = getObject(pointerId);
Ben Clayton484e08e2019-04-05 12:11:39 +01002663 auto &pointerTy = getType(pointer.type);
Nicolas Capens86509d92019-03-21 13:23:50 -04002664 std::memory_order memoryOrder = std::memory_order_relaxed;
2665
Nicolas Capens82eb22e2019-04-10 01:15:43 -04002666 ASSERT(getType(pointer.type).element == result.type);
2667 ASSERT(Type::ID(insn.word(1)) == result.type);
2668 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2669
Chris Forbes0b092cd2019-04-19 09:02:14 -07002670 if(pointerTy.storageClass == spv::StorageClassUniformConstant)
Nicolas Capens7d867272019-04-08 22:51:08 -04002671 {
2672 // Just propagate the pointer.
Nicolas Capens7d867272019-04-08 22:51:08 -04002673 auto &ptr = routine->getPointer(pointerId);
2674 routine->createPointer(resultId, ptr);
Nicolas Capens7d867272019-04-08 22:51:08 -04002675 return EmitResult::Continue;
2676 }
2677
Nicolas Capens86509d92019-03-21 13:23:50 -04002678 if(atomic)
2679 {
2680 Object::ID semanticsId = insn.word(5);
2681 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2682 memoryOrder = MemoryOrder(memorySemantics);
2683 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002684
Ben Clayton484e08e2019-04-05 12:11:39 +01002685 if (pointerTy.storageClass == spv::StorageClassImage)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002686 {
Ben Claytonefec1b92019-03-05 17:38:16 +00002687 UNIMPLEMENTED("StorageClassImage load not yet implemented");
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002688 }
2689
Ben Clayton3d497382019-04-08 16:16:12 -04002690 auto ptr = GetPointerToData(pointerId, 0, routine);
Ben Clayton831db962019-02-27 14:57:18 +00002691
Ben Clayton484e08e2019-04-05 12:11:39 +01002692 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
Ben Clayton49d81582019-03-12 20:05:04 +00002693
Nicolas Capensfabdec52019-03-21 17:04:05 -04002694 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002695
2696 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Clayton831db962019-02-27 14:57:18 +00002697 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002698 auto p = ptr + offset;
2699 if (interleavedByLane) { p = interleaveByLane(p); }
2700 dst.move(i, SIMD::Load<SIMD::Float>(p, state->activeLaneMask(), atomic, memoryOrder));
2701 });
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002702
2703 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002704 }
2705
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002706 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002707 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002708 auto routine = state->routine;
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002709 bool atomic = (insn.opcode() == spv::OpAtomicStore);
Ben Claytonaf973b62019-03-13 18:19:20 +00002710 Object::ID pointerId = insn.word(1);
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002711 Object::ID objectId = insn.word(atomic ? 4 : 2);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002712 auto &object = getObject(objectId);
2713 auto &pointer = getObject(pointerId);
2714 auto &pointerTy = getType(pointer.type);
2715 auto &elementTy = getType(pointerTy.element);
Nicolas Capens86509d92019-03-21 13:23:50 -04002716 std::memory_order memoryOrder = std::memory_order_relaxed;
2717
2718 if(atomic)
2719 {
2720 Object::ID semanticsId = insn.word(3);
2721 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2722 memoryOrder = MemoryOrder(memorySemantics);
2723 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002724
Nicolas Capens5e8414e2019-03-19 16:22:35 -04002725 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2726
Ben Clayton484e08e2019-04-05 12:11:39 +01002727 if (pointerTy.storageClass == spv::StorageClassImage)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002728 {
Ben Claytonefec1b92019-03-05 17:38:16 +00002729 UNIMPLEMENTED("StorageClassImage store not yet implemented");
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002730 }
2731
Ben Clayton3d497382019-04-08 16:16:12 -04002732 auto ptr = GetPointerToData(pointerId, 0, routine);
Ben Clayton484e08e2019-04-05 12:11:39 +01002733 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002734
2735 if (object.kind == Object::Kind::Constant)
2736 {
Ben Clayton49d81582019-03-12 20:05:04 +00002737 // Constant source data.
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002738 auto src = reinterpret_cast<float *>(object.constantValue.get());
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002739 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002740 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002741 auto p = ptr + offset;
2742 if (interleavedByLane) { p = interleaveByLane(p); }
2743 SIMD::Store(p, SIMD::Float(src[i]), state->activeLaneMask(), atomic, memoryOrder);
2744 });
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002745 }
2746 else
2747 {
Ben Clayton49d81582019-03-12 20:05:04 +00002748 // Intermediate source data.
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002749 auto &src = routine->getIntermediate(objectId);
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002750 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002751 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04002752 auto p = ptr + offset;
2753 if (interleavedByLane) { p = interleaveByLane(p); }
2754 SIMD::Store(p, src.Float(i), state->activeLaneMask(), atomic, memoryOrder);
2755 });
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002756 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002757
2758 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002759 }
2760
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002761 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
Nicolas Capensfabdec52019-03-21 17:04:05 -04002762 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002763 auto routine = state->routine;
Nicolas Capensfabdec52019-03-21 17:04:05 -04002764 Type::ID typeId = insn.word(1);
2765 Object::ID resultId = insn.word(2);
2766 Object::ID baseId = insn.word(3);
2767 uint32_t numIndexes = insn.wordCount() - 4;
2768 const uint32_t *indexes = insn.wordPointer(4);
2769 auto &type = getType(typeId);
2770 ASSERT(type.sizeInComponents == 1);
Ben Clayton1d514f32019-04-19 16:11:18 -04002771 ASSERT(getObject(resultId).kind == Object::Kind::Pointer);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002772
2773 if(type.storageClass == spv::StorageClassPushConstant ||
2774 type.storageClass == spv::StorageClassUniform ||
2775 type.storageClass == spv::StorageClassStorageBuffer)
2776 {
Ben Clayton3d497382019-04-08 16:16:12 -04002777 auto ptr = WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine);
Ben Clayton5f7e9112019-04-16 11:03:40 -04002778 routine->createPointer(resultId, ptr);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002779 }
2780 else
2781 {
Ben Clayton5f7e9112019-04-16 11:03:40 -04002782 auto ptr = WalkAccessChain(baseId, numIndexes, indexes, routine);
2783 routine->createPointer(resultId, ptr);
Nicolas Capensfabdec52019-03-21 17:04:05 -04002784 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002785
2786 return EmitResult::Continue;
Nicolas Capensfabdec52019-03-21 17:04:05 -04002787 }
2788
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002789 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002790 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002791 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002792 auto &type = getType(insn.word(1));
2793 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2794 auto offset = 0u;
2795
2796 for (auto i = 0u; i < insn.wordCount() - 3; i++)
2797 {
Ben Claytonaf973b62019-03-13 18:19:20 +00002798 Object::ID srcObjectId = insn.word(3u + i);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002799 auto & srcObject = getObject(srcObjectId);
2800 auto & srcObjectTy = getType(srcObject.type);
2801 GenericValue srcObjectAccess(this, routine, srcObjectId);
2802
2803 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002804 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002805 dst.move(offset++, srcObjectAccess.Float(j));
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002806 }
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002807 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002808
2809 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002810 }
2811
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002812 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002813 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002814 auto routine = state->routine;
Ben Claytonaf973b62019-03-13 18:19:20 +00002815 Type::ID resultTypeId = insn.word(1);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002816 auto &type = getType(resultTypeId);
2817 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2818 auto &newPartObject = getObject(insn.word(3));
2819 auto &newPartObjectTy = getType(newPartObject.type);
2820 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
2821
2822 GenericValue srcObjectAccess(this, routine, insn.word(4));
2823 GenericValue newPartObjectAccess(this, routine, insn.word(3));
2824
2825 // old components before
2826 for (auto i = 0u; i < firstNewComponent; i++)
2827 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002828 dst.move(i, srcObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002829 }
2830 // new part
2831 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
2832 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002833 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002834 }
2835 // old components after
2836 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
2837 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002838 dst.move(i, srcObjectAccess.Float(i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002839 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002840
2841 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002842 }
2843
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002844 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002845 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002846 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002847 auto &type = getType(insn.word(1));
2848 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2849 auto &compositeObject = getObject(insn.word(3));
Ben Claytonaf973b62019-03-13 18:19:20 +00002850 Type::ID compositeTypeId = compositeObject.definition.word(1);
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002851 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
2852
2853 GenericValue compositeObjectAccess(this, routine, insn.word(3));
2854 for (auto i = 0u; i < type.sizeInComponents; i++)
2855 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002856 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002857 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002858
2859 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002860 }
2861
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002862 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002863 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002864 auto routine = state->routine;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002865 auto &type = getType(insn.word(1));
2866 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2867
Chris Forbes13eba6c2019-03-08 10:41:05 -08002868 // Note: number of components in result type, first half type, and second
2869 // half type are all independent.
2870 auto &firstHalfType = getType(getObject(insn.word(3)).type);
2871
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002872 GenericValue firstHalfAccess(this, routine, insn.word(3));
2873 GenericValue secondHalfAccess(this, routine, insn.word(4));
2874
2875 for (auto i = 0u; i < type.sizeInComponents; i++)
2876 {
2877 auto selector = insn.word(5 + i);
2878 if (selector == static_cast<uint32_t>(-1))
2879 {
2880 // Undefined value. Until we decide to do real undef values, zero is as good
2881 // a value as any
Nicolas Capens80c796b2019-03-19 21:38:44 -04002882 dst.move(i, RValue<SIMD::Float>(0.0f));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002883 }
Chris Forbes13eba6c2019-03-08 10:41:05 -08002884 else if (selector < firstHalfType.sizeInComponents)
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002885 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002886 dst.move(i, firstHalfAccess.Float(selector));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002887 }
2888 else
2889 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002890 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002891 }
2892 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002893
2894 return EmitResult::Continue;
Ben Claytondfc0f3b2019-02-26 12:19:48 +00002895 }
2896
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002897 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
Chris Forbesfaed9d32019-03-15 10:31:08 -07002898 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002899 auto routine = state->routine;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002900 auto &type = getType(insn.word(1));
2901 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2902 auto &srcType = getType(getObject(insn.word(3)).type);
2903
2904 GenericValue src(this, routine, insn.word(3));
2905 GenericValue index(this, routine, insn.word(4));
2906
2907 SIMD::UInt v = SIMD::UInt(0);
2908
2909 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2910 {
2911 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2912 }
2913
Nicolas Capens80c796b2019-03-19 21:38:44 -04002914 dst.move(0, v);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002915 return EmitResult::Continue;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002916 }
2917
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002918 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
Chris Forbesfaed9d32019-03-15 10:31:08 -07002919 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002920 auto routine = state->routine;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002921 auto &type = getType(insn.word(1));
2922 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2923
2924 GenericValue src(this, routine, insn.word(3));
2925 GenericValue component(this, routine, insn.word(4));
2926 GenericValue index(this, routine, insn.word(5));
2927
2928 for (auto i = 0u; i < type.sizeInComponents; i++)
2929 {
2930 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
Nicolas Capens80c796b2019-03-19 21:38:44 -04002931 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
Chris Forbesfaed9d32019-03-15 10:31:08 -07002932 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002933 return EmitResult::Continue;
Chris Forbesfaed9d32019-03-15 10:31:08 -07002934 }
2935
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002936 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
Chris Forbes856ebf82019-03-08 15:30:18 -08002937 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002938 auto routine = state->routine;
Chris Forbes856ebf82019-03-08 15:30:18 -08002939 auto &type = getType(insn.word(1));
2940 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00002941 auto lhs = GenericValue(this, routine, insn.word(3));
2942 auto rhs = GenericValue(this, routine, insn.word(4));
Chris Forbes856ebf82019-03-08 15:30:18 -08002943
2944 for (auto i = 0u; i < type.sizeInComponents; i++)
2945 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04002946 dst.move(i, lhs.Float(i) * rhs.Float(0));
Chris Forbes856ebf82019-03-08 15:30:18 -08002947 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00002948
2949 return EmitResult::Continue;
Chris Forbes856ebf82019-03-08 15:30:18 -08002950 }
2951
Chris Forbes06f4ed72019-03-28 09:53:20 +13002952 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2953 {
2954 auto routine = state->routine;
2955 auto &type = getType(insn.word(1));
2956 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2957 auto lhs = GenericValue(this, routine, insn.word(3));
2958 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Clayton16ab9e92019-04-08 10:57:35 -04002959 auto rhsType = getType(rhs.type);
Chris Forbes06f4ed72019-03-28 09:53:20 +13002960
2961 for (auto i = 0u; i < type.sizeInComponents; i++)
2962 {
2963 SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2964 for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2965 {
2966 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2967 }
2968 dst.move(i, v);
2969 }
2970
2971 return EmitResult::Continue;
2972 }
2973
Chris Forbesa563dd82019-03-28 10:32:55 +13002974 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2975 {
2976 auto routine = state->routine;
2977 auto &type = getType(insn.word(1));
2978 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2979 auto lhs = GenericValue(this, routine, insn.word(3));
2980 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Clayton16ab9e92019-04-08 10:57:35 -04002981 auto lhsType = getType(lhs.type);
Chris Forbesa563dd82019-03-28 10:32:55 +13002982
2983 for (auto i = 0u; i < type.sizeInComponents; i++)
2984 {
2985 SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2986 for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2987 {
2988 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2989 }
2990 dst.move(i, v);
2991 }
2992
2993 return EmitResult::Continue;
2994 }
2995
Chris Forbes51562f12019-03-28 19:08:39 -07002996 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
2997 {
2998 auto routine = state->routine;
2999 auto &type = getType(insn.word(1));
3000 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3001 auto lhs = GenericValue(this, routine, insn.word(3));
3002 auto rhs = GenericValue(this, routine, insn.word(4));
3003
3004 auto numColumns = type.definition.word(3);
3005 auto numRows = getType(type.definition.word(2)).definition.word(3);
3006 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
3007
3008 for (auto row = 0u; row < numRows; row++)
3009 {
3010 for (auto col = 0u; col < numColumns; col++)
3011 {
3012 SIMD::Float v = SIMD::Float(0);
3013 for (auto i = 0u; i < numAdds; i++)
3014 {
3015 v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
3016 }
3017 dst.move(numRows * col + row, v);
3018 }
3019 }
3020
3021 return EmitResult::Continue;
3022 }
3023
Ben Clayton3ee52992019-04-08 11:01:23 -04003024 SpirvShader::EmitResult SpirvShader::EmitOuterProduct(InsnIterator insn, EmitState *state) const
3025 {
3026 auto routine = state->routine;
3027 auto &type = getType(insn.word(1));
3028 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3029 auto lhs = GenericValue(this, routine, insn.word(3));
3030 auto rhs = GenericValue(this, routine, insn.word(4));
3031 auto &lhsType = getType(lhs.type);
3032 auto &rhsType = getType(rhs.type);
3033
3034 ASSERT(type.definition.opcode() == spv::OpTypeMatrix);
3035 ASSERT(lhsType.definition.opcode() == spv::OpTypeVector);
3036 ASSERT(rhsType.definition.opcode() == spv::OpTypeVector);
3037 ASSERT(getType(lhsType.element).opcode() == spv::OpTypeFloat);
3038 ASSERT(getType(rhsType.element).opcode() == spv::OpTypeFloat);
3039
3040 auto numRows = lhsType.definition.word(3);
3041 auto numCols = rhsType.definition.word(3);
3042
3043 for (auto col = 0u; col < numCols; col++)
3044 {
3045 for (auto row = 0u; row < numRows; row++)
3046 {
3047 dst.move(col * numRows + row, lhs.Float(row) * rhs.Float(col));
3048 }
3049 }
3050
3051 return EmitResult::Continue;
3052 }
3053
Ben Clayton620f7082019-04-08 11:12:08 -04003054 SpirvShader::EmitResult SpirvShader::EmitTranspose(InsnIterator insn, EmitState *state) const
3055 {
3056 auto routine = state->routine;
3057 auto &type = getType(insn.word(1));
3058 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3059 auto mat = GenericValue(this, routine, insn.word(3));
3060
3061 auto numCols = type.definition.word(3);
3062 auto numRows = getType(type.definition.word(2)).sizeInComponents;
3063
3064 for (auto col = 0u; col < numCols; col++)
3065 {
3066 for (auto row = 0u; row < numRows; row++)
3067 {
3068 dst.move(col * numRows + row, mat.Float(row * numCols + col));
3069 }
3070 }
3071
3072 return EmitResult::Continue;
3073 }
3074
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003075 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
Ben Claytondd1e37e2019-02-28 19:59:15 +00003076 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003077 auto routine = state->routine;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003078 auto &type = getType(insn.word(1));
3079 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3080 auto src = GenericValue(this, routine, insn.word(3));
3081
3082 for (auto i = 0u; i < type.sizeInComponents; i++)
3083 {
Ben Claytondd1e37e2019-02-28 19:59:15 +00003084 switch (insn.opcode())
3085 {
3086 case spv::OpNot:
3087 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
Nicolas Capens80c796b2019-03-19 21:38:44 -04003088 dst.move(i, ~src.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003089 break;
Ben Claytonb5bfa502019-04-08 14:26:36 -04003090 case spv::OpBitFieldInsert:
3091 {
3092 auto insert = GenericValue(this, routine, insn.word(4)).UInt(i);
3093 auto offset = GenericValue(this, routine, insn.word(5)).UInt(0);
3094 auto count = GenericValue(this, routine, insn.word(6)).UInt(0);
3095 auto one = SIMD::UInt(1);
3096 auto v = src.UInt(i);
3097 auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
3098 dst.move(i, (v & ~mask) | ((insert << offset) & mask));
3099 break;
3100 }
Ben Claytond86db952019-04-08 13:43:11 -04003101 case spv::OpBitFieldSExtract:
3102 case spv::OpBitFieldUExtract:
3103 {
3104 auto offset = GenericValue(this, routine, insn.word(4)).UInt(0);
3105 auto count = GenericValue(this, routine, insn.word(5)).UInt(0);
3106 auto one = SIMD::UInt(1);
3107 auto v = src.UInt(i);
3108 SIMD::UInt out = (v >> offset) & Bitmask32(count);
3109 if (insn.opcode() == spv::OpBitFieldSExtract)
3110 {
3111 auto sign = out & NthBit32(count - one);
3112 auto sext = ~(sign - one);
3113 out |= sext;
3114 }
3115 dst.move(i, out);
3116 break;
3117 }
Ben Claytond2a46432019-04-08 11:41:45 -04003118 case spv::OpBitReverse:
Ben Claytone339d6c2019-04-13 16:49:39 +00003119 {
Ben Claytondb4f3df2019-04-13 16:48:33 +00003120 // TODO: Add an intrinsic to reactor. Even if there isn't a
3121 // single vector instruction, there may be target-dependent
3122 // ways to make this faster.
3123 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
3124 SIMD::UInt v = src.UInt(i);
3125 v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
3126 v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
3127 v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
3128 v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
3129 v = (v >> 16) | (v << 16);
3130 dst.move(i, v);
Ben Claytond2a46432019-04-08 11:41:45 -04003131 break;
Ben Claytone339d6c2019-04-13 16:49:39 +00003132 }
Ben Clayton1eb017d2019-04-08 11:32:09 -04003133 case spv::OpBitCount:
Ben Claytone339d6c2019-04-13 16:49:39 +00003134 {
3135 // TODO: Add an intrinsic to reactor. Even if there isn't a
3136 // single vector instruction, there may be target-dependent
3137 // ways to make this faster.
3138 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
3139 auto v = src.UInt(i);
3140 SIMD::UInt c = v - ((v >> 1) & SIMD::UInt(0x55555555));
3141 c = ((c >> 2) & SIMD::UInt(0x33333333)) + (c & SIMD::UInt(0x33333333));
3142 c = ((c >> 4) + c) & SIMD::UInt(0x0F0F0F0F);
3143 c = ((c >> 8) + c) & SIMD::UInt(0x00FF00FF);
3144 c = ((c >> 16) + c) & SIMD::UInt(0x0000FFFF);
3145 dst.move(i, c);
Ben Clayton1eb017d2019-04-08 11:32:09 -04003146 break;
Ben Claytone339d6c2019-04-13 16:49:39 +00003147 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003148 case spv::OpSNegate:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003149 dst.move(i, -src.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003150 break;
3151 case spv::OpFNegate:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003152 dst.move(i, -src.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003153 break;
Chris Forbes4d503052019-03-01 17:13:57 -08003154 case spv::OpConvertFToU:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003155 dst.move(i, SIMD::UInt(src.Float(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003156 break;
3157 case spv::OpConvertFToS:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003158 dst.move(i, SIMD::Int(src.Float(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003159 break;
3160 case spv::OpConvertSToF:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003161 dst.move(i, SIMD::Float(src.Int(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003162 break;
3163 case spv::OpConvertUToF:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003164 dst.move(i, SIMD::Float(src.UInt(i)));
Chris Forbes4d503052019-03-01 17:13:57 -08003165 break;
3166 case spv::OpBitcast:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003167 dst.move(i, src.Float(i));
Chris Forbes4d503052019-03-01 17:13:57 -08003168 break;
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003169 case spv::OpIsInf:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003170 dst.move(i, IsInf(src.Float(i)));
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003171 break;
3172 case spv::OpIsNan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003173 dst.move(i, IsNan(src.Float(i)));
Chris Forbes3ed33ce2019-03-07 13:38:31 -08003174 break;
Chris Forbesaff2dd02019-03-20 14:50:24 -07003175 case spv::OpDPdx:
3176 case spv::OpDPdxCoarse:
3177 // Derivative instructions: FS invocations are laid out like so:
3178 // 0 1
3179 // 2 3
3180 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
Nicolas Capens80c796b2019-03-19 21:38:44 -04003181 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003182 break;
3183 case spv::OpDPdy:
3184 case spv::OpDPdyCoarse:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003185 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003186 break;
3187 case spv::OpFwidth:
3188 case spv::OpFwidthCoarse:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003189 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
Chris Forbesaff2dd02019-03-20 14:50:24 -07003190 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
3191 break;
3192 case spv::OpDPdxFine:
3193 {
3194 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3195 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3196 SIMD::Float v = SIMD::Float(firstRow);
3197 v = Insert(v, secondRow, 2);
3198 v = Insert(v, secondRow, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003199 dst.move(i, v);
Chris Forbesaff2dd02019-03-20 14:50:24 -07003200 break;
3201 }
3202 case spv::OpDPdyFine:
3203 {
3204 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3205 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3206 SIMD::Float v = SIMD::Float(firstColumn);
3207 v = Insert(v, secondColumn, 1);
3208 v = Insert(v, secondColumn, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003209 dst.move(i, v);
Chris Forbesaff2dd02019-03-20 14:50:24 -07003210 break;
3211 }
3212 case spv::OpFwidthFine:
3213 {
3214 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3215 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3216 SIMD::Float dpdx = SIMD::Float(firstRow);
3217 dpdx = Insert(dpdx, secondRow, 2);
3218 dpdx = Insert(dpdx, secondRow, 3);
3219 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3220 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3221 SIMD::Float dpdy = SIMD::Float(firstColumn);
3222 dpdy = Insert(dpdy, secondColumn, 1);
3223 dpdy = Insert(dpdy, secondColumn, 3);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003224 dst.move(i, Abs(dpdx) + Abs(dpdy));
Chris Forbesaff2dd02019-03-20 14:50:24 -07003225 break;
3226 }
Ben Clayton64da4ae2019-04-19 12:34:06 -04003227 case spv::OpQuantizeToF16:
3228 {
3229 auto abs = Abs(src.Float(i));
3230 auto sign = src.Int(i) & SIMD::Int(0x80000000);
3231 auto isZero = CmpLT(abs, SIMD::Float(0.000061035));
3232 auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
3233 auto isNaN = IsNan(abs);
3234 auto isInfOrNan = isInf | isNaN;
3235 SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
3236 v &= ~isZero | SIMD::Int(0x80000000);
3237 v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
3238 v |= isNaN & SIMD::Int(0x400000);
3239 dst.move(i, v);
3240 break;
3241 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003242 default:
Ben Clayton92797c22019-04-25 10:44:03 +01003243 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Claytondd1e37e2019-02-28 19:59:15 +00003244 }
3245 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003246
3247 return EmitResult::Continue;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003248 }
3249
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003250 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
Ben Claytondd1e37e2019-02-28 19:59:15 +00003251 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003252 auto routine = state->routine;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003253 auto &type = getType(insn.word(1));
3254 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Chris Forbese86b6dc2019-03-01 09:08:47 -08003255 auto &lhsType = getType(getObject(insn.word(3)).type);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003256 auto lhs = GenericValue(this, routine, insn.word(3));
3257 auto rhs = GenericValue(this, routine, insn.word(4));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003258
Chris Forbese86b6dc2019-03-01 09:08:47 -08003259 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
Ben Claytondd1e37e2019-02-28 19:59:15 +00003260 {
Ben Claytondd1e37e2019-02-28 19:59:15 +00003261 switch (insn.opcode())
3262 {
3263 case spv::OpIAdd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003264 dst.move(i, lhs.Int(i) + rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003265 break;
3266 case spv::OpISub:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003267 dst.move(i, lhs.Int(i) - rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003268 break;
3269 case spv::OpIMul:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003270 dst.move(i, lhs.Int(i) * rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003271 break;
3272 case spv::OpSDiv:
Ben Clayton49d2c132019-03-14 12:21:53 +00003273 {
Ben Claytona2749f32019-03-14 19:32:41 +00003274 SIMD::Int a = lhs.Int(i);
3275 SIMD::Int b = rhs.Int(i);
3276 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3277 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
Nicolas Capens80c796b2019-03-19 21:38:44 -04003278 dst.move(i, a / b);
Ben Claytondd1e37e2019-02-28 19:59:15 +00003279 break;
Ben Clayton49d2c132019-03-14 12:21:53 +00003280 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003281 case spv::OpUDiv:
Ben Clayton49d2c132019-03-14 12:21:53 +00003282 {
3283 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003284 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003285 break;
Ben Clayton49d2c132019-03-14 12:21:53 +00003286 }
Chris Forbes71673c82019-03-14 12:55:20 -07003287 case spv::OpSRem:
3288 {
3289 SIMD::Int a = lhs.Int(i);
3290 SIMD::Int b = rhs.Int(i);
3291 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3292 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
Nicolas Capens80c796b2019-03-19 21:38:44 -04003293 dst.move(i, a % b);
Chris Forbes71673c82019-03-14 12:55:20 -07003294 break;
3295 }
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003296 case spv::OpSMod:
3297 {
Ben Claytona2749f32019-03-14 19:32:41 +00003298 SIMD::Int a = lhs.Int(i);
3299 SIMD::Int b = rhs.Int(i);
3300 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3301 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
3302 auto mod = a % b;
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003303 // If a and b have opposite signs, the remainder operation takes
3304 // the sign from a but OpSMod is supposed to take the sign of b.
3305 // Adding b will ensure that the result has the correct sign and
3306 // that it is still congruent to a modulo b.
3307 //
3308 // See also http://mathforum.org/library/drmath/view/52343.html
3309 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
3310 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003311 dst.move(i, As<SIMD::Float>(fixedMod));
Ben Claytonbb8c8e22019-03-08 12:04:00 +00003312 break;
3313 }
Ben Claytondd1e37e2019-02-28 19:59:15 +00003314 case spv::OpUMod:
Chris Forbes3ebf5832019-03-14 08:15:25 -07003315 {
3316 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003317 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003318 break;
Chris Forbes3ebf5832019-03-14 08:15:25 -07003319 }
Ben Claytone95eeb12019-03-04 16:32:09 +00003320 case spv::OpIEqual:
Chris Forbes787b4462019-03-08 12:16:57 -08003321 case spv::OpLogicalEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003322 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003323 break;
3324 case spv::OpINotEqual:
Chris Forbes787b4462019-03-08 12:16:57 -08003325 case spv::OpLogicalNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003326 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003327 break;
3328 case spv::OpUGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003329 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003330 break;
3331 case spv::OpSGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003332 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003333 break;
3334 case spv::OpUGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003335 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003336 break;
3337 case spv::OpSGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003338 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003339 break;
3340 case spv::OpULessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003341 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003342 break;
3343 case spv::OpSLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003344 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003345 break;
3346 case spv::OpULessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003347 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003348 break;
3349 case spv::OpSLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003350 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
Ben Claytone95eeb12019-03-04 16:32:09 +00003351 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003352 case spv::OpFAdd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003353 dst.move(i, lhs.Float(i) + rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003354 break;
3355 case spv::OpFSub:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003356 dst.move(i, lhs.Float(i) - rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003357 break;
Chris Forbes9d931532019-03-08 09:53:03 -08003358 case spv::OpFMul:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003359 dst.move(i, lhs.Float(i) * rhs.Float(i));
Chris Forbes9d931532019-03-08 09:53:03 -08003360 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003361 case spv::OpFDiv:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003362 dst.move(i, lhs.Float(i) / rhs.Float(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003363 break;
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07003364 case spv::OpFMod:
3365 // TODO(b/126873455): inaccurate for values greater than 2^24
Nicolas Capens80c796b2019-03-19 21:38:44 -04003366 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
Chris Forbes0e4d6ff2019-03-15 13:43:36 -07003367 break;
Chris Forbes1a4c7122019-03-15 14:50:47 -07003368 case spv::OpFRem:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003369 dst.move(i, lhs.Float(i) % rhs.Float(i));
Chris Forbes1a4c7122019-03-15 14:50:47 -07003370 break;
Ben Claytonec1aeb82019-03-04 19:33:27 +00003371 case spv::OpFOrdEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003372 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003373 break;
3374 case spv::OpFUnordEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003375 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003376 break;
3377 case spv::OpFOrdNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003378 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003379 break;
3380 case spv::OpFUnordNotEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003381 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003382 break;
3383 case spv::OpFOrdLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003384 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003385 break;
3386 case spv::OpFUnordLessThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003387 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003388 break;
3389 case spv::OpFOrdGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003390 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003391 break;
3392 case spv::OpFUnordGreaterThan:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003393 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003394 break;
3395 case spv::OpFOrdLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003396 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003397 break;
3398 case spv::OpFUnordLessThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003399 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003400 break;
3401 case spv::OpFOrdGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003402 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003403 break;
3404 case spv::OpFUnordGreaterThanEqual:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003405 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
Ben Claytonec1aeb82019-03-04 19:33:27 +00003406 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003407 case spv::OpShiftRightLogical:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003408 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003409 break;
3410 case spv::OpShiftRightArithmetic:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003411 dst.move(i, lhs.Int(i) >> rhs.Int(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003412 break;
3413 case spv::OpShiftLeftLogical:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003414 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003415 break;
3416 case spv::OpBitwiseOr:
3417 case spv::OpLogicalOr:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003418 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003419 break;
3420 case spv::OpBitwiseXor:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003421 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003422 break;
3423 case spv::OpBitwiseAnd:
3424 case spv::OpLogicalAnd:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003425 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
Ben Claytondd1e37e2019-02-28 19:59:15 +00003426 break;
Chris Forbese86b6dc2019-03-01 09:08:47 -08003427 case spv::OpSMulExtended:
3428 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
3429 // In our flat view then, component i is the i'th component of the first member;
3430 // component i + N is the i'th component of the second member.
Nicolas Capens80c796b2019-03-19 21:38:44 -04003431 dst.move(i, lhs.Int(i) * rhs.Int(i));
3432 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
Chris Forbese86b6dc2019-03-01 09:08:47 -08003433 break;
3434 case spv::OpUMulExtended:
Nicolas Capens80c796b2019-03-19 21:38:44 -04003435 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
3436 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
Chris Forbese86b6dc2019-03-01 09:08:47 -08003437 break;
Chris Forbes3e6f60b2019-05-08 17:28:10 -07003438 case spv::OpIAddCarry:
3439 dst.move(i, lhs.UInt(i) + rhs.UInt(i));
3440 dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
3441 break;
3442 case spv::OpISubBorrow:
3443 dst.move(i, lhs.UInt(i) - rhs.UInt(i));
3444 dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
3445 break;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003446 default:
Ben Clayton92797c22019-04-25 10:44:03 +01003447 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Ben Claytondd1e37e2019-02-28 19:59:15 +00003448 }
3449 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003450
3451 return EmitResult::Continue;
Ben Claytondd1e37e2019-02-28 19:59:15 +00003452 }
3453
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003454 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
Chris Forbes2b287cc2019-03-01 13:24:17 -08003455 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003456 auto routine = state->routine;
Chris Forbes2b287cc2019-03-01 13:24:17 -08003457 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00003458 ASSERT(type.sizeInComponents == 1);
Chris Forbes2b287cc2019-03-01 13:24:17 -08003459 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3460 auto &lhsType = getType(getObject(insn.word(3)).type);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003461 auto lhs = GenericValue(this, routine, insn.word(3));
3462 auto rhs = GenericValue(this, routine, insn.word(4));
Chris Forbes2b287cc2019-03-01 13:24:17 -08003463
Nicolas Capens80c796b2019-03-19 21:38:44 -04003464 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003465 return EmitResult::Continue;
Chris Forbes2b287cc2019-03-01 13:24:17 -08003466 }
3467
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003468 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
Ben Claytonbf943f62019-03-05 12:57:39 +00003469 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003470 auto routine = state->routine;
Ben Claytonbf943f62019-03-05 12:57:39 +00003471 auto &type = getType(insn.word(1));
3472 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003473 auto cond = GenericValue(this, routine, insn.word(3));
3474 auto lhs = GenericValue(this, routine, insn.word(4));
3475 auto rhs = GenericValue(this, routine, insn.word(5));
Ben Claytonbf943f62019-03-05 12:57:39 +00003476
3477 for (auto i = 0u; i < type.sizeInComponents; i++)
3478 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003479 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
Ben Claytonbf943f62019-03-05 12:57:39 +00003480 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003481
3482 return EmitResult::Continue;
Ben Claytonbf943f62019-03-05 12:57:39 +00003483 }
3484
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003485 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
Chris Forbes9667a5b2019-03-07 09:26:48 -08003486 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00003487 auto routine = state->routine;
Chris Forbes9667a5b2019-03-07 09:26:48 -08003488 auto &type = getType(insn.word(1));
3489 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
3490 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
3491
3492 switch (extInstIndex)
3493 {
3494 case GLSLstd450FAbs:
3495 {
3496 auto src = GenericValue(this, routine, insn.word(5));
3497 for (auto i = 0u; i < type.sizeInComponents; i++)
3498 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003499 dst.move(i, Abs(src.Float(i)));
Chris Forbes9667a5b2019-03-07 09:26:48 -08003500 }
3501 break;
3502 }
3503 case GLSLstd450SAbs:
3504 {
3505 auto src = GenericValue(this, routine, insn.word(5));
3506 for (auto i = 0u; i < type.sizeInComponents; i++)
3507 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003508 dst.move(i, Abs(src.Int(i)));
Chris Forbes9667a5b2019-03-07 09:26:48 -08003509 }
3510 break;
3511 }
Chris Forbes15dff362019-03-08 11:31:31 -08003512 case GLSLstd450Cross:
3513 {
3514 auto lhs = GenericValue(this, routine, insn.word(5));
3515 auto rhs = GenericValue(this, routine, insn.word(6));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003516 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
3517 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
3518 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
Chris Forbes15dff362019-03-08 11:31:31 -08003519 break;
3520 }
Chris Forbesc212bbd2019-03-08 12:02:27 -08003521 case GLSLstd450Floor:
3522 {
3523 auto src = GenericValue(this, routine, insn.word(5));
3524 for (auto i = 0u; i < type.sizeInComponents; i++)
3525 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003526 dst.move(i, Floor(src.Float(i)));
Chris Forbesc212bbd2019-03-08 12:02:27 -08003527 }
3528 break;
3529 }
Chris Forbesdd172cc2019-03-08 13:36:40 -08003530 case GLSLstd450Trunc:
3531 {
3532 auto src = GenericValue(this, routine, insn.word(5));
3533 for (auto i = 0u; i < type.sizeInComponents; i++)
3534 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003535 dst.move(i, Trunc(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003536 }
3537 break;
3538 }
3539 case GLSLstd450Ceil:
3540 {
3541 auto src = GenericValue(this, routine, insn.word(5));
3542 for (auto i = 0u; i < type.sizeInComponents; i++)
3543 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003544 dst.move(i, Ceil(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003545 }
3546 break;
3547 }
3548 case GLSLstd450Fract:
3549 {
3550 auto src = GenericValue(this, routine, insn.word(5));
3551 for (auto i = 0u; i < type.sizeInComponents; i++)
3552 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003553 dst.move(i, Frac(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003554 }
3555 break;
3556 }
3557 case GLSLstd450Round:
3558 {
3559 auto src = GenericValue(this, routine, insn.word(5));
3560 for (auto i = 0u; i < type.sizeInComponents; i++)
3561 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003562 dst.move(i, Round(src.Float(i)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003563 }
3564 break;
3565 }
3566 case GLSLstd450RoundEven:
3567 {
3568 auto src = GenericValue(this, routine, insn.word(5));
3569 for (auto i = 0u; i < type.sizeInComponents; i++)
3570 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003571 auto x = Round(src.Float(i));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003572 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003573 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003574 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
Chris Forbesdd172cc2019-03-08 13:36:40 -08003575 }
3576 break;
3577 }
Chris Forbesdb170772019-03-08 14:50:44 -08003578 case GLSLstd450FMin:
3579 {
3580 auto lhs = GenericValue(this, routine, insn.word(5));
3581 auto rhs = GenericValue(this, routine, insn.word(6));
3582 for (auto i = 0u; i < type.sizeInComponents; i++)
3583 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003584 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003585 }
3586 break;
3587 }
3588 case GLSLstd450FMax:
3589 {
3590 auto lhs = GenericValue(this, routine, insn.word(5));
3591 auto rhs = GenericValue(this, routine, insn.word(6));
3592 for (auto i = 0u; i < type.sizeInComponents; i++)
3593 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003594 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003595 }
3596 break;
3597 }
3598 case GLSLstd450SMin:
3599 {
3600 auto lhs = GenericValue(this, routine, insn.word(5));
3601 auto rhs = GenericValue(this, routine, insn.word(6));
3602 for (auto i = 0u; i < type.sizeInComponents; i++)
3603 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003604 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003605 }
3606 break;
3607 }
3608 case GLSLstd450SMax:
3609 {
3610 auto lhs = GenericValue(this, routine, insn.word(5));
3611 auto rhs = GenericValue(this, routine, insn.word(6));
3612 for (auto i = 0u; i < type.sizeInComponents; i++)
3613 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003614 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003615 }
3616 break;
3617 }
3618 case GLSLstd450UMin:
3619 {
3620 auto lhs = GenericValue(this, routine, insn.word(5));
3621 auto rhs = GenericValue(this, routine, insn.word(6));
3622 for (auto i = 0u; i < type.sizeInComponents; i++)
3623 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003624 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003625 }
3626 break;
3627 }
3628 case GLSLstd450UMax:
3629 {
3630 auto lhs = GenericValue(this, routine, insn.word(5));
3631 auto rhs = GenericValue(this, routine, insn.word(6));
3632 for (auto i = 0u; i < type.sizeInComponents; i++)
3633 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003634 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003635 }
3636 break;
3637 }
3638 case GLSLstd450Step:
3639 {
3640 auto edge = GenericValue(this, routine, insn.word(5));
3641 auto x = GenericValue(this, routine, insn.word(6));
3642 for (auto i = 0u; i < type.sizeInComponents; i++)
3643 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003644 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
Chris Forbesdb170772019-03-08 14:50:44 -08003645 }
3646 break;
3647 }
3648 case GLSLstd450SmoothStep:
3649 {
3650 auto edge0 = GenericValue(this, routine, insn.word(5));
3651 auto edge1 = GenericValue(this, routine, insn.word(6));
3652 auto x = GenericValue(this, routine, insn.word(7));
3653 for (auto i = 0u; i < type.sizeInComponents; i++)
3654 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003655 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
3656 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003657 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
Chris Forbesdb170772019-03-08 14:50:44 -08003658 }
3659 break;
3660 }
3661 case GLSLstd450FMix:
3662 {
3663 auto x = GenericValue(this, routine, insn.word(5));
3664 auto y = GenericValue(this, routine, insn.word(6));
3665 auto a = GenericValue(this, routine, insn.word(7));
3666 for (auto i = 0u; i < type.sizeInComponents; i++)
3667 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003668 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
Chris Forbesdb170772019-03-08 14:50:44 -08003669 }
3670 break;
3671 }
3672 case GLSLstd450FClamp:
3673 {
3674 auto x = GenericValue(this, routine, insn.word(5));
3675 auto minVal = GenericValue(this, routine, insn.word(6));
3676 auto maxVal = GenericValue(this, routine, insn.word(7));
3677 for (auto i = 0u; i < type.sizeInComponents; i++)
3678 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003679 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003680 }
3681 break;
3682 }
3683 case GLSLstd450SClamp:
3684 {
3685 auto x = GenericValue(this, routine, insn.word(5));
3686 auto minVal = GenericValue(this, routine, insn.word(6));
3687 auto maxVal = GenericValue(this, routine, insn.word(7));
3688 for (auto i = 0u; i < type.sizeInComponents; i++)
3689 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003690 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003691 }
3692 break;
3693 }
3694 case GLSLstd450UClamp:
3695 {
3696 auto x = GenericValue(this, routine, insn.word(5));
3697 auto minVal = GenericValue(this, routine, insn.word(6));
3698 auto maxVal = GenericValue(this, routine, insn.word(7));
3699 for (auto i = 0u; i < type.sizeInComponents; i++)
3700 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003701 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
Chris Forbesdb170772019-03-08 14:50:44 -08003702 }
3703 break;
3704 }
3705 case GLSLstd450FSign:
3706 {
3707 auto src = GenericValue(this, routine, insn.word(5));
3708 for (auto i = 0u; i < type.sizeInComponents; i++)
3709 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003710 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
3711 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
Nicolas Capens80c796b2019-03-19 21:38:44 -04003712 dst.move(i, neg | pos);
Chris Forbesdb170772019-03-08 14:50:44 -08003713 }
3714 break;
3715 }
3716 case GLSLstd450SSign:
3717 {
3718 auto src = GenericValue(this, routine, insn.word(5));
3719 for (auto i = 0u; i < type.sizeInComponents; i++)
3720 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003721 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
3722 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003723 dst.move(i, neg | pos);
Chris Forbesdb170772019-03-08 14:50:44 -08003724 }
3725 break;
3726 }
Chris Forbes868ed902019-03-13 17:39:45 -07003727 case GLSLstd450Reflect:
3728 {
3729 auto I = GenericValue(this, routine, insn.word(5));
3730 auto N = GenericValue(this, routine, insn.word(6));
3731
3732 SIMD::Float d = Dot(type.sizeInComponents, I, N);
3733
3734 for (auto i = 0u; i < type.sizeInComponents; i++)
3735 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003736 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003737 }
3738 break;
3739 }
3740 case GLSLstd450Refract:
3741 {
3742 auto I = GenericValue(this, routine, insn.word(5));
3743 auto N = GenericValue(this, routine, insn.word(6));
3744 auto eta = GenericValue(this, routine, insn.word(7));
3745
3746 SIMD::Float d = Dot(type.sizeInComponents, I, N);
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003747 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
Chris Forbes868ed902019-03-13 17:39:45 -07003748 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003749 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
Chris Forbes868ed902019-03-13 17:39:45 -07003750
3751 for (auto i = 0u; i < type.sizeInComponents; i++)
3752 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003753 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
Chris Forbes868ed902019-03-13 17:39:45 -07003754 }
3755 break;
3756 }
3757 case GLSLstd450FaceForward:
3758 {
3759 auto N = GenericValue(this, routine, insn.word(5));
3760 auto I = GenericValue(this, routine, insn.word(6));
3761 auto Nref = GenericValue(this, routine, insn.word(7));
3762
3763 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
3764 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
3765
3766 for (auto i = 0u; i < type.sizeInComponents; i++)
3767 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003768 auto n = N.Float(i);
Nicolas Capens80c796b2019-03-19 21:38:44 -04003769 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
Chris Forbes868ed902019-03-13 17:39:45 -07003770 }
3771 break;
3772 }
3773 case GLSLstd450Length:
3774 {
3775 auto x = GenericValue(this, routine, insn.word(5));
3776 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
3777
Nicolas Capens80c796b2019-03-19 21:38:44 -04003778 dst.move(0, Sqrt(d));
Chris Forbes868ed902019-03-13 17:39:45 -07003779 break;
3780 }
3781 case GLSLstd450Normalize:
3782 {
3783 auto x = GenericValue(this, routine, insn.word(5));
3784 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
3785 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
3786
3787 for (auto i = 0u; i < type.sizeInComponents; i++)
3788 {
Nicolas Capens80c796b2019-03-19 21:38:44 -04003789 dst.move(i, invLength * x.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003790 }
3791 break;
3792 }
3793 case GLSLstd450Distance:
3794 {
3795 auto p0 = GenericValue(this, routine, insn.word(5));
3796 auto p1 = GenericValue(this, routine, insn.word(6));
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003797 auto p0Type = getType(p0.type);
Chris Forbes868ed902019-03-13 17:39:45 -07003798
3799 // sqrt(dot(p0-p1, p0-p1))
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003800 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
Chris Forbes868ed902019-03-13 17:39:45 -07003801
3802 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
3803 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00003804 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
Chris Forbes868ed902019-03-13 17:39:45 -07003805 }
3806
Nicolas Capens80c796b2019-03-19 21:38:44 -04003807 dst.move(0, Sqrt(d));
Chris Forbes868ed902019-03-13 17:39:45 -07003808 break;
3809 }
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003810 case GLSLstd450Modf:
3811 {
3812 auto val = GenericValue(this, routine, insn.word(5));
3813 auto ptrId = Object::ID(insn.word(6));
3814 auto ptrTy = getType(getObject(ptrId).type);
3815 auto ptr = GetPointerToData(ptrId, 0, routine);
3816 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
3817
3818 for (auto i = 0u; i < type.sizeInComponents; i++)
3819 {
3820 auto whole = Floor(val.Float(i));
3821 auto frac = Frac(val.Float(i));
3822
3823 dst.move(i, frac);
3824
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04003825 auto p = ptr + (i * sizeof(float));
3826 if (interleavedByLane) { p = interleaveByLane(p); }
3827 SIMD::Store(p, whole, state->activeLaneMask());
Ben Clayton2cbf4f92019-04-08 16:19:30 -04003828 }
3829 break;
3830 }
Chris Forbesfde26fe2019-04-08 14:56:52 -07003831 case GLSLstd450ModfStruct:
3832 {
3833 auto val = GenericValue(this, routine, insn.word(5));
3834 auto valTy = getType(val.type);
3835
3836 for (auto i = 0u; i < valTy.sizeInComponents; i++)
3837 {
3838 auto whole = Floor(val.Float(i));
3839 auto frac = Frac(val.Float(i));
3840
3841 dst.move(i, frac);
3842 dst.move(i + valTy.sizeInComponents, whole);
3843 }
3844 break;
3845 }
Chris Forbes50e64932019-04-08 17:49:27 -07003846 case GLSLstd450PackSnorm4x8:
3847 {
3848 auto val = GenericValue(this, routine, insn.word(5));
3849 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3850 SIMD::Int(0xFF)) |
3851 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3852 SIMD::Int(0xFF)) << 8) |
3853 ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3854 SIMD::Int(0xFF)) << 16) |
3855 ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
3856 SIMD::Int(0xFF)) << 24));
3857 break;
3858 }
3859 case GLSLstd450PackUnorm4x8:
3860 {
3861 auto val = GenericValue(this, routine, insn.word(5));
3862 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
3863 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
3864 ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
3865 ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
3866 break;
3867 }
3868 case GLSLstd450PackSnorm2x16:
3869 {
3870 auto val = GenericValue(this, routine, insn.word(5));
3871 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
3872 SIMD::Int(0xFFFF)) |
3873 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
3874 SIMD::Int(0xFFFF)) << 16));
3875 break;
3876 }
3877 case GLSLstd450PackUnorm2x16:
3878 {
3879 auto val = GenericValue(this, routine, insn.word(5));
3880 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
3881 SIMD::UInt(0xFFFF)) |
3882 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
3883 SIMD::UInt(0xFFFF)) << 16));
3884 break;
3885 }
3886 case GLSLstd450PackHalf2x16:
3887 {
3888 auto val = GenericValue(this, routine, insn.word(5));
3889 dst.move(0, FloatToHalfBits(val.UInt(0), false) | FloatToHalfBits(val.UInt(1), true));
3890 break;
3891 }
3892 case GLSLstd450UnpackSnorm4x8:
3893 {
3894 auto val = GenericValue(this, routine, insn.word(5));
3895 dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3896 dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3897 dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3898 dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3899 break;
3900 }
3901 case GLSLstd450UnpackUnorm4x8:
3902 {
3903 auto val = GenericValue(this, routine, insn.word(5));
3904 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3905 dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3906 dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3907 dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
3908 break;
3909 }
3910 case GLSLstd450UnpackSnorm2x16:
3911 {
3912 auto val = GenericValue(this, routine, insn.word(5));
3913 // clamp(f / 32767.0, -1.0, 1.0)
3914 dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
3915 SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3916 dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
3917 SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
3918 break;
3919 }
3920 case GLSLstd450UnpackUnorm2x16:
3921 {
3922 auto val = GenericValue(this, routine, insn.word(5));
3923 // f / 65535.0
3924 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
3925 dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
3926 break;
3927 }
3928 case GLSLstd450UnpackHalf2x16:
3929 {
3930 auto val = GenericValue(this, routine, insn.word(5));
Chris Forbesd3546952019-04-30 19:32:19 -07003931 dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
3932 dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbes50e64932019-04-08 17:49:27 -07003933 break;
3934 }
Ben Clayton6caf8212019-04-09 11:28:39 -04003935 case GLSLstd450Fma:
3936 {
3937 auto a = GenericValue(this, routine, insn.word(5));
3938 auto b = GenericValue(this, routine, insn.word(6));
3939 auto c = GenericValue(this, routine, insn.word(7));
3940 for (auto i = 0u; i < type.sizeInComponents; i++)
3941 {
3942 dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
3943 }
3944 break;
3945 }
Ben Claytonfc77af12019-04-09 10:48:00 -04003946 case GLSLstd450Frexp:
3947 {
3948 auto val = GenericValue(this, routine, insn.word(5));
3949 auto ptrId = Object::ID(insn.word(6));
3950 auto ptrTy = getType(getObject(ptrId).type);
3951 auto ptr = GetPointerToData(ptrId, 0, routine);
3952 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
3953
3954 for (auto i = 0u; i < type.sizeInComponents; i++)
3955 {
3956 SIMD::Float significand;
3957 SIMD::Int exponent;
3958 std::tie(significand, exponent) = Frexp(val.Float(i));
3959
3960 dst.move(i, significand);
3961
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04003962 auto p = ptr + (i * sizeof(float));
3963 if (interleavedByLane) { p = interleaveByLane(p); }
3964 SIMD::Store(p, exponent, state->activeLaneMask());
Ben Claytonfc77af12019-04-09 10:48:00 -04003965 }
3966 break;
3967 }
3968 case GLSLstd450FrexpStruct:
3969 {
3970 auto val = GenericValue(this, routine, insn.word(5));
3971 auto numComponents = getType(val.type).sizeInComponents;
3972 for (auto i = 0u; i < numComponents; i++)
3973 {
3974 auto significandAndExponent = Frexp(val.Float(i));
3975 dst.move(i, significandAndExponent.first);
3976 dst.move(i + numComponents, significandAndExponent.second);
3977 }
3978 break;
3979 }
Ben Clayton20f6ba82019-04-09 12:07:29 -04003980 case GLSLstd450Ldexp:
3981 {
3982 auto significand = GenericValue(this, routine, insn.word(5));
3983 auto exponent = GenericValue(this, routine, insn.word(6));
3984 for (auto i = 0u; i < type.sizeInComponents; i++)
3985 {
3986 // Assumes IEEE 754
3987 auto significandExponent = Exponent(significand.Float(i));
3988 auto combinedExponent = exponent.Int(i) + significandExponent;
3989 SIMD::UInt v = (significand.UInt(i) & SIMD::UInt(0x807FFFFF)) |
3990 (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23));
3991 dst.move(i, As<SIMD::Float>(v));
3992 }
3993 break;
3994 }
Ben Claytona15fcf42019-04-09 13:04:51 -04003995 case GLSLstd450Radians:
3996 {
3997 auto degrees = GenericValue(this, routine, insn.word(5));
3998 for (auto i = 0u; i < type.sizeInComponents; i++)
3999 {
4000 dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
4001 }
4002 break;
4003 }
Ben Clayton251bc282019-04-09 13:05:52 -04004004 case GLSLstd450Degrees:
4005 {
4006 auto radians = GenericValue(this, routine, insn.word(5));
4007 for (auto i = 0u; i < type.sizeInComponents; i++)
4008 {
4009 dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
4010 }
4011 break;
4012 }
Ben Claytona2c8b772019-04-09 13:42:36 -04004013 case GLSLstd450Sin:
4014 {
4015 auto radians = GenericValue(this, routine, insn.word(5));
4016 for (auto i = 0u; i < type.sizeInComponents; i++)
4017 {
4018 dst.move(i, Sin(radians.Float(i)));
4019 }
4020 break;
4021 }
Ben Clayton1b6f8c72019-04-09 13:47:43 -04004022 case GLSLstd450Cos:
4023 {
4024 auto radians = GenericValue(this, routine, insn.word(5));
4025 for (auto i = 0u; i < type.sizeInComponents; i++)
4026 {
4027 dst.move(i, Cos(radians.Float(i)));
4028 }
4029 break;
4030 }
Ben Clayton14740062019-04-09 13:48:41 -04004031 case GLSLstd450Tan:
4032 {
4033 auto radians = GenericValue(this, routine, insn.word(5));
4034 for (auto i = 0u; i < type.sizeInComponents; i++)
4035 {
4036 dst.move(i, Tan(radians.Float(i)));
4037 }
4038 break;
4039 }
Ben Claytonf9350d72019-04-09 14:19:02 -04004040 case GLSLstd450Asin:
4041 {
4042 auto val = GenericValue(this, routine, insn.word(5));
4043 for (auto i = 0u; i < type.sizeInComponents; i++)
4044 {
4045 dst.move(i, Asin(val.Float(i)));
4046 }
4047 break;
4048 }
Ben Claytoneafae472019-04-09 14:22:38 -04004049 case GLSLstd450Acos:
4050 {
4051 auto val = GenericValue(this, routine, insn.word(5));
4052 for (auto i = 0u; i < type.sizeInComponents; i++)
4053 {
4054 dst.move(i, Acos(val.Float(i)));
4055 }
4056 break;
4057 }
Ben Clayton749b4e02019-04-09 14:27:43 -04004058 case GLSLstd450Atan:
4059 {
4060 auto val = GenericValue(this, routine, insn.word(5));
4061 for (auto i = 0u; i < type.sizeInComponents; i++)
4062 {
4063 dst.move(i, Atan(val.Float(i)));
4064 }
4065 break;
4066 }
Ben Claytond9636972019-04-09 15:09:54 -04004067 case GLSLstd450Sinh:
4068 {
4069 auto val = GenericValue(this, routine, insn.word(5));
4070 for (auto i = 0u; i < type.sizeInComponents; i++)
4071 {
4072 dst.move(i, Sinh(val.Float(i)));
4073 }
4074 break;
4075 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004076 case GLSLstd450Cosh:
4077 {
Ben Clayton900ea2c2019-04-09 15:25:36 -04004078 auto val = GenericValue(this, routine, insn.word(5));
4079 for (auto i = 0u; i < type.sizeInComponents; i++)
4080 {
4081 dst.move(i, Cosh(val.Float(i)));
4082 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004083 break;
4084 }
4085 case GLSLstd450Tanh:
4086 {
Ben Clayton3928bd92019-04-09 15:27:41 -04004087 auto val = GenericValue(this, routine, insn.word(5));
4088 for (auto i = 0u; i < type.sizeInComponents; i++)
4089 {
4090 dst.move(i, Tanh(val.Float(i)));
4091 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004092 break;
4093 }
4094 case GLSLstd450Asinh:
4095 {
Ben Claytonf6d77ab2019-04-09 15:30:04 -04004096 auto val = GenericValue(this, routine, insn.word(5));
4097 for (auto i = 0u; i < type.sizeInComponents; i++)
4098 {
4099 dst.move(i, Asinh(val.Float(i)));
4100 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004101 break;
4102 }
4103 case GLSLstd450Acosh:
4104 {
Ben Clayton28ebcb02019-04-09 15:33:38 -04004105 auto val = GenericValue(this, routine, insn.word(5));
4106 for (auto i = 0u; i < type.sizeInComponents; i++)
4107 {
4108 dst.move(i, Acosh(val.Float(i)));
4109 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004110 break;
4111 }
4112 case GLSLstd450Atanh:
4113 {
Ben Claytonfa6a5392019-04-09 15:35:24 -04004114 auto val = GenericValue(this, routine, insn.word(5));
4115 for (auto i = 0u; i < type.sizeInComponents; i++)
4116 {
4117 dst.move(i, Atanh(val.Float(i)));
4118 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004119 break;
4120 }
4121 case GLSLstd450Atan2:
4122 {
Ben Claytona520c3e2019-04-09 15:43:45 -04004123 auto x = GenericValue(this, routine, insn.word(5));
4124 auto y = GenericValue(this, routine, insn.word(6));
4125 for (auto i = 0u; i < type.sizeInComponents; i++)
4126 {
4127 dst.move(i, Atan2(x.Float(i), y.Float(i)));
4128 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004129 break;
4130 }
4131 case GLSLstd450Pow:
4132 {
Ben Claytonbfe94f02019-04-09 15:52:12 -04004133 auto x = GenericValue(this, routine, insn.word(5));
4134 auto y = GenericValue(this, routine, insn.word(6));
4135 for (auto i = 0u; i < type.sizeInComponents; i++)
4136 {
4137 dst.move(i, Pow(x.Float(i), y.Float(i)));
4138 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004139 break;
4140 }
4141 case GLSLstd450Exp:
4142 {
Ben Clayton242f0022019-04-09 16:00:53 -04004143 auto val = GenericValue(this, routine, insn.word(5));
4144 for (auto i = 0u; i < type.sizeInComponents; i++)
4145 {
4146 dst.move(i, Exp(val.Float(i)));
4147 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004148 break;
4149 }
4150 case GLSLstd450Log:
4151 {
Ben Clayton2c1da722019-04-09 16:03:03 -04004152 auto val = GenericValue(this, routine, insn.word(5));
4153 for (auto i = 0u; i < type.sizeInComponents; i++)
4154 {
4155 dst.move(i, Log(val.Float(i)));
4156 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004157 break;
4158 }
4159 case GLSLstd450Exp2:
4160 {
Ben Claytonf40b56c2019-04-09 16:06:55 -04004161 auto val = GenericValue(this, routine, insn.word(5));
4162 for (auto i = 0u; i < type.sizeInComponents; i++)
4163 {
4164 dst.move(i, Exp2(val.Float(i)));
4165 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004166 break;
4167 }
4168 case GLSLstd450Log2:
4169 {
Ben Claytone17acfe2019-04-09 16:09:13 -04004170 auto val = GenericValue(this, routine, insn.word(5));
4171 for (auto i = 0u; i < type.sizeInComponents; i++)
4172 {
4173 dst.move(i, Log2(val.Float(i)));
4174 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004175 break;
4176 }
4177 case GLSLstd450Sqrt:
4178 {
Ben Clayton6517ad22019-04-09 16:11:40 -04004179 auto val = GenericValue(this, routine, insn.word(5));
4180 for (auto i = 0u; i < type.sizeInComponents; i++)
4181 {
4182 dst.move(i, Sqrt(val.Float(i)));
4183 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004184 break;
4185 }
4186 case GLSLstd450InverseSqrt:
4187 {
Ben Clayton93451852019-04-09 16:25:30 -04004188 auto val = GenericValue(this, routine, insn.word(5));
4189 Decorations d;
4190 ApplyDecorationsForId(&d, insn.word(5));
4191 if (d.RelaxedPrecision)
4192 {
4193 for (auto i = 0u; i < type.sizeInComponents; i++)
4194 {
4195 dst.move(i, RcpSqrt_pp(val.Float(i)));
4196 }
4197 }
4198 else
4199 {
4200 for (auto i = 0u; i < type.sizeInComponents; i++)
4201 {
4202 dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
4203 }
4204 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004205 break;
4206 }
4207 case GLSLstd450Determinant:
4208 {
Ben Clayton1fb633c2019-04-09 17:24:59 -04004209 auto mat = GenericValue(this, routine, insn.word(5));
4210 auto numComponents = getType(mat.type).sizeInComponents;
4211 switch (numComponents)
4212 {
4213 case 4: // 2x2
4214 dst.move(0, Determinant(
4215 mat.Float(0), mat.Float(1),
4216 mat.Float(2), mat.Float(3)));
4217 break;
4218 case 9: // 3x3
4219 dst.move(0, Determinant(
4220 mat.Float(0), mat.Float(1), mat.Float(2),
4221 mat.Float(3), mat.Float(4), mat.Float(5),
4222 mat.Float(6), mat.Float(7), mat.Float(8)));
4223 break;
4224 case 16: // 4x4
4225 dst.move(0, Determinant(
4226 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4227 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4228 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4229 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
4230 break;
4231 default:
4232 UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
4233 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004234 break;
4235 }
4236 case GLSLstd450MatrixInverse:
4237 {
Ben Clayton445a44a2019-04-10 16:37:19 -04004238 auto mat = GenericValue(this, routine, insn.word(5));
4239 auto numComponents = getType(mat.type).sizeInComponents;
4240 switch (numComponents)
4241 {
4242 case 4: // 2x2
4243 {
4244 auto inv = MatrixInverse(
4245 mat.Float(0), mat.Float(1),
4246 mat.Float(2), mat.Float(3));
4247 for (uint32_t i = 0; i < inv.size(); i++)
4248 {
4249 dst.move(i, inv[i]);
4250 }
4251 break;
4252 }
4253 case 9: // 3x3
4254 {
4255 auto inv = MatrixInverse(
4256 mat.Float(0), mat.Float(1), mat.Float(2),
4257 mat.Float(3), mat.Float(4), mat.Float(5),
4258 mat.Float(6), mat.Float(7), mat.Float(8));
4259 for (uint32_t i = 0; i < inv.size(); i++)
4260 {
4261 dst.move(i, inv[i]);
4262 }
4263 break;
4264 }
4265 case 16: // 4x4
4266 {
4267 auto inv = MatrixInverse(
4268 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4269 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4270 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4271 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
4272 for (uint32_t i = 0; i < inv.size(); i++)
4273 {
4274 dst.move(i, inv[i]);
4275 }
4276 break;
4277 }
4278 default:
4279 UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
4280 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004281 break;
4282 }
4283 case GLSLstd450IMix:
4284 {
Ben Clayton238fec32019-04-09 16:27:56 -04004285 UNREACHABLE("GLSLstd450IMix has been removed from the specification");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004286 break;
4287 }
4288 case GLSLstd450PackDouble2x32:
4289 {
Ben Clayton92797c22019-04-25 10:44:03 +01004290 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004291 break;
4292 }
4293 case GLSLstd450UnpackDouble2x32:
4294 {
Ben Clayton92797c22019-04-25 10:44:03 +01004295 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004296 break;
4297 }
4298 case GLSLstd450FindILsb:
4299 {
Ben Clayton3f007c42019-04-10 14:54:23 -04004300 auto val = GenericValue(this, routine, insn.word(5));
4301 for (auto i = 0u; i < type.sizeInComponents; i++)
4302 {
4303 auto v = val.UInt(i);
4304 dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
4305 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004306 break;
4307 }
4308 case GLSLstd450FindSMsb:
4309 {
Ben Clayton60958262019-04-10 14:53:30 -04004310 auto val = GenericValue(this, routine, insn.word(5));
4311 for (auto i = 0u; i < type.sizeInComponents; i++)
4312 {
4313 auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
4314 dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
4315 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004316 break;
4317 }
4318 case GLSLstd450FindUMsb:
4319 {
Ben Clayton60958262019-04-10 14:53:30 -04004320 auto val = GenericValue(this, routine, insn.word(5));
4321 for (auto i = 0u; i < type.sizeInComponents; i++)
4322 {
4323 dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
4324 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004325 break;
4326 }
4327 case GLSLstd450InterpolateAtCentroid:
4328 {
Ben Clayton92797c22019-04-25 10:44:03 +01004329 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004330 break;
4331 }
4332 case GLSLstd450InterpolateAtSample:
4333 {
Ben Clayton92797c22019-04-25 10:44:03 +01004334 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004335 break;
4336 }
4337 case GLSLstd450InterpolateAtOffset:
4338 {
Ben Clayton92797c22019-04-25 10:44:03 +01004339 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
Ben Clayton91fd0e22019-04-09 15:19:39 -04004340 break;
4341 }
4342 case GLSLstd450NMin:
4343 {
Ben Claytonee10bcf2019-04-09 17:01:01 -04004344 auto x = GenericValue(this, routine, insn.word(5));
4345 auto y = GenericValue(this, routine, insn.word(6));
4346 for (auto i = 0u; i < type.sizeInComponents; i++)
4347 {
4348 dst.move(i, NMin(x.Float(i), y.Float(i)));
4349 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004350 break;
4351 }
4352 case GLSLstd450NMax:
4353 {
Ben Clayton02de7e02019-04-09 17:01:26 -04004354 auto x = GenericValue(this, routine, insn.word(5));
4355 auto y = GenericValue(this, routine, insn.word(6));
4356 for (auto i = 0u; i < type.sizeInComponents; i++)
4357 {
4358 dst.move(i, NMax(x.Float(i), y.Float(i)));
4359 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004360 break;
4361 }
4362 case GLSLstd450NClamp:
4363 {
Ben Clayton4d633122019-04-09 17:02:34 -04004364 auto x = GenericValue(this, routine, insn.word(5));
4365 auto minVal = GenericValue(this, routine, insn.word(6));
4366 auto maxVal = GenericValue(this, routine, insn.word(7));
4367 for (auto i = 0u; i < type.sizeInComponents; i++)
4368 {
4369 auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
4370 dst.move(i, clamp);
4371 }
Ben Clayton91fd0e22019-04-09 15:19:39 -04004372 break;
4373 }
Chris Forbes9667a5b2019-03-07 09:26:48 -08004374 default:
Ben Clayton92797c22019-04-25 10:44:03 +01004375 UNREACHABLE("ExtInst %d", int(extInstIndex));
Ben Clayton91fd0e22019-04-09 15:19:39 -04004376 break;
Chris Forbes9667a5b2019-03-07 09:26:48 -08004377 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004378
4379 return EmitResult::Continue;
Chris Forbes9667a5b2019-03-07 09:26:48 -08004380 }
4381
Nicolas Capens86509d92019-03-21 13:23:50 -04004382 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
4383 {
Ben Claytonb16c5862019-05-08 14:01:38 +01004384 auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
4385 spv::MemorySemanticsAcquireMask |
4386 spv::MemorySemanticsReleaseMask |
4387 spv::MemorySemanticsAcquireReleaseMask |
4388 spv::MemorySemanticsSequentiallyConsistentMask
4389 );
4390 switch (control)
Nicolas Capens86509d92019-03-21 13:23:50 -04004391 {
4392 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
4393 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
4394 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
4395 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
4396 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
4397 default:
Ben Claytonb16c5862019-05-08 14:01:38 +01004398 // "it is invalid for more than one of these four bits to be set:
4399 // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
4400 UNREACHABLE("MemorySemanticsMask: %x", int(control));
Nicolas Capens86509d92019-03-21 13:23:50 -04004401 return std::memory_order_acq_rel;
4402 }
4403 }
4404
Chris Forbes868ed902019-03-13 17:39:45 -07004405 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
4406 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004407 SIMD::Float d = x.Float(0) * y.Float(0);
Chris Forbes868ed902019-03-13 17:39:45 -07004408
4409 for (auto i = 1u; i < numComponents; i++)
4410 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004411 d += x.Float(i) * y.Float(i);
Chris Forbes868ed902019-03-13 17:39:45 -07004412 }
4413
4414 return d;
4415 }
4416
Chris Forbes50e64932019-04-08 17:49:27 -07004417 SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
4418 {
4419 static const uint32_t mask_sign = 0x80000000u;
4420 static const uint32_t mask_round = ~0xfffu;
4421 static const uint32_t c_f32infty = 255 << 23;
4422 static const uint32_t c_magic = 15 << 23;
4423 static const uint32_t c_nanbit = 0x200;
4424 static const uint32_t c_infty_as_fp16 = 0x7c00;
4425 static const uint32_t c_clamp = (31 << 23) - 0x1000;
4426
4427 SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
4428 SIMD::UInt absf = floatBits ^ justsign;
4429 SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
4430
4431 // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
4432 // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
4433 SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
4434 As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
4435 ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) |
4436 SIMD::UInt(c_infty_as_fp16)));
4437
4438 return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
4439 }
4440
Ben Claytonfc77af12019-04-09 10:48:00 -04004441 std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
4442 {
4443 // Assumes IEEE 754
4444 auto v = As<SIMD::UInt>(val);
4445 auto isNotZero = CmpNEQ(v & SIMD::UInt(0x7FFFFFFF), SIMD::UInt(0));
4446 auto zeroSign = v & SIMD::UInt(0x80000000) & ~isNotZero;
Ben Claytonf41ca6b2019-04-10 22:33:00 +01004447 auto significand = As<SIMD::Float>((((v & SIMD::UInt(0x807FFFFF)) | SIMD::UInt(0x3F000000)) & isNotZero) | zeroSign);
Ben Clayton20f6ba82019-04-09 12:07:29 -04004448 auto exponent = Exponent(val) & SIMD::Int(isNotZero);
Ben Claytonfc77af12019-04-09 10:48:00 -04004449 return std::make_pair(significand, exponent);
4450 }
4451
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004452 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
Chris Forbes0785f692019-03-08 09:09:18 -08004453 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004454 auto routine = state->routine;
Chris Forbes0785f692019-03-08 09:09:18 -08004455 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00004456 ASSERT(type.sizeInComponents == 1);
Chris Forbes0785f692019-03-08 09:09:18 -08004457 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
4458 auto &srcType = getType(getObject(insn.word(3)).type);
4459 auto src = GenericValue(this, routine, insn.word(3));
4460
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004461 SIMD::UInt result = src.UInt(0);
Chris Forbes0785f692019-03-08 09:09:18 -08004462
4463 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4464 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004465 result |= src.UInt(i);
Chris Forbes0785f692019-03-08 09:09:18 -08004466 }
4467
Nicolas Capens80c796b2019-03-19 21:38:44 -04004468 dst.move(0, result);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004469 return EmitResult::Continue;
Chris Forbes0785f692019-03-08 09:09:18 -08004470 }
4471
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004472 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
Chris Forbes0785f692019-03-08 09:09:18 -08004473 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004474 auto routine = state->routine;
Chris Forbes0785f692019-03-08 09:09:18 -08004475 auto &type = getType(insn.word(1));
Ben Claytonaf26cfe2019-03-21 17:32:44 +00004476 ASSERT(type.sizeInComponents == 1);
Chris Forbes0785f692019-03-08 09:09:18 -08004477 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
4478 auto &srcType = getType(getObject(insn.word(3)).type);
4479 auto src = GenericValue(this, routine, insn.word(3));
4480
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004481 SIMD::UInt result = src.UInt(0);
Chris Forbes0785f692019-03-08 09:09:18 -08004482
4483 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4484 {
Ben Clayton9b62c5e2019-03-08 09:32:34 +00004485 result &= src.UInt(i);
Chris Forbes0785f692019-03-08 09:09:18 -08004486 }
4487
Nicolas Capens80c796b2019-03-19 21:38:44 -04004488 dst.move(0, result);
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004489 return EmitResult::Continue;
Chris Forbes0785f692019-03-08 09:09:18 -08004490 }
4491
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004492 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
Ben Claytone37ce612019-03-13 19:57:42 +00004493 {
Ben Claytonc0cf68b2019-03-21 17:46:08 +00004494 auto target = Block::ID(insn.word(1));
4495 auto edge = Block::Edge{state->currentBlock, target};
4496 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
4497 return EmitResult::Terminator;
Ben Claytone37ce612019-03-13 19:57:42 +00004498 }
4499
Ben Clayton9fd02e02019-03-21 18:47:15 +00004500 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
4501 {
4502 auto block = getBlock(state->currentBlock);
4503 ASSERT(block.branchInstruction == insn);
4504
4505 auto condId = Object::ID(block.branchInstruction.word(1));
4506 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
4507 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
4508
4509 auto cond = GenericValue(this, state->routine, condId);
Ben Clayton16ab9e92019-04-08 10:57:35 -04004510 ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
Ben Clayton9fd02e02019-03-21 18:47:15 +00004511
4512 // TODO: Optimize for case where all lanes take same path.
4513
4514 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
4515 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
4516
4517 return EmitResult::Terminator;
4518 }
4519
Ben Clayton213a8ce2019-03-21 18:57:23 +00004520 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
4521 {
4522 auto block = getBlock(state->currentBlock);
4523 ASSERT(block.branchInstruction == insn);
4524
4525 auto selId = Object::ID(block.branchInstruction.word(1));
4526
4527 auto sel = GenericValue(this, state->routine, selId);
Ben Clayton16ab9e92019-04-08 10:57:35 -04004528 ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");
Ben Clayton213a8ce2019-03-21 18:57:23 +00004529
4530 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
4531
4532 // TODO: Optimize for case where all lanes take same path.
4533
4534 SIMD::Int defaultLaneMask = state->activeLaneMask();
4535
4536 // Gather up the case label matches and calculate defaultLaneMask.
4537 std::vector<RValue<SIMD::Int>> caseLabelMatches;
4538 caseLabelMatches.reserve(numCases);
4539 for (uint32_t i = 0; i < numCases; i++)
4540 {
4541 auto label = block.branchInstruction.word(i * 2 + 3);
4542 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
4543 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
4544 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
4545 defaultLaneMask &= ~caseLabelMatch;
4546 }
4547
4548 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
4549 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
4550
4551 return EmitResult::Terminator;
4552 }
Ben Clayton9fd02e02019-03-21 18:47:15 +00004553
4554 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
4555 {
4556 // TODO: Log something in this case?
4557 state->setActiveLaneMask(SIMD::Int(0));
4558 return EmitResult::Terminator;
4559 }
4560
4561 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
4562 {
4563 state->setActiveLaneMask(SIMD::Int(0));
4564 return EmitResult::Terminator;
4565 }
4566
Chris Forbes97e95892019-04-02 13:37:37 +13004567 SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
4568 {
4569 state->routine->killMask |= SignMask(state->activeLaneMask());
4570 state->setActiveLaneMask(SIMD::Int(0));
4571 return EmitResult::Terminator;
4572 }
4573
Ben Clayton9fd02e02019-03-21 18:47:15 +00004574 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
4575 {
4576 auto routine = state->routine;
4577 auto typeId = Type::ID(insn.word(1));
4578 auto type = getType(typeId);
4579 auto objectId = Object::ID(insn.word(2));
Ben Clayton5d143aa2019-04-03 13:30:14 +01004580 auto currentBlock = getBlock(state->currentBlock);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004581
Nicolas Capens459453a2019-03-27 15:27:27 -04004582 auto tmp = std::unique_ptr<SIMD::Int[]>(new SIMD::Int[type.sizeInComponents]);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004583
4584 bool first = true;
4585 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
4586 {
4587 auto varId = Object::ID(insn.word(w + 0));
4588 auto blockId = Block::ID(insn.word(w + 1));
4589
Ben Clayton5d143aa2019-04-03 13:30:14 +01004590 if (currentBlock.ins.count(blockId) == 0)
4591 {
4592 continue; // In is unreachable. Ignore.
4593 }
4594
Ben Clayton9fd02e02019-03-21 18:47:15 +00004595 auto in = GenericValue(this, routine, varId);
Ben Claytonfe3f0132019-03-26 11:10:16 +00004596 auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004597
4598 for (uint32_t i = 0; i < type.sizeInComponents; i++)
4599 {
4600 auto inMasked = in.Int(i) & mask;
Nicolas Capens459453a2019-03-27 15:27:27 -04004601 tmp[i] = first ? inMasked : (tmp[i] | inMasked);
Ben Clayton9fd02e02019-03-21 18:47:15 +00004602 }
4603 first = false;
4604 }
4605
Nicolas Capens459453a2019-03-27 15:27:27 -04004606 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
4607 for(uint32_t i = 0; i < type.sizeInComponents; i++)
4608 {
4609 dst.move(i, tmp[i]);
4610 }
4611
Ben Clayton9fd02e02019-03-21 18:47:15 +00004612 return EmitResult::Continue;
4613 }
4614
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004615 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
Nicolas Capens7d867272019-04-08 22:51:08 -04004616 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004617 return EmitImageSample({variant, Implicit}, insn, state);
Nicolas Capens125dba02019-04-24 02:03:22 -04004618 }
4619
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004620 SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
Nicolas Capens125dba02019-04-24 02:03:22 -04004621 {
Nicolas Capens420d9da2019-04-26 17:44:42 -04004622 uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(5));
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004623 imageOperands &= ~spv::ImageOperandsConstOffsetMask; // Dealt with later.
Nicolas Capens78896332019-04-29 16:41:50 -04004624
Nicolas Capens420d9da2019-04-26 17:44:42 -04004625 if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
4626 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004627 return EmitImageSample({variant, Lod}, insn, state);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004628 }
4629 else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
4630 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004631 return EmitImageSample({variant, Grad}, insn, state);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004632 }
4633 else UNIMPLEMENTED("Image Operands %x", imageOperands);
4634 return EmitResult::Continue;
Nicolas Capens125dba02019-04-24 02:03:22 -04004635 }
4636
Chris Forbescd631592019-04-27 10:37:18 -07004637 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
4638 {
Chris Forbes6f1b7652019-04-30 13:01:47 -07004639 return EmitImageSample({None, Fetch}, insn, state);
Chris Forbescd631592019-04-27 10:37:18 -07004640 }
4641
Nicolas Capens78896332019-04-29 16:41:50 -04004642 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
Nicolas Capens125dba02019-04-24 02:03:22 -04004643 {
Nicolas Capens7d867272019-04-08 22:51:08 -04004644 Type::ID resultTypeId = insn.word(1);
4645 Object::ID resultId = insn.word(2);
4646 Object::ID sampledImageId = insn.word(3);
4647 Object::ID coordinateId = insn.word(4);
4648 auto &resultType = getType(resultTypeId);
4649
4650 auto &result = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Chris Forbesfa82c342019-04-26 16:42:38 -07004651 auto imageDescriptor = state->routine->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
4652
4653 // If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
4654 auto &sampledImage = getObject(sampledImageId);
4655 auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
4656 state->routine->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
4657
Nicolas Capens7d867272019-04-08 22:51:08 -04004658 auto coordinate = GenericValue(this, state->routine, coordinateId);
Nicolas Capens125dba02019-04-24 02:03:22 -04004659 auto &coordinateType = getType(coordinate.type);
Nicolas Capens7d867272019-04-08 22:51:08 -04004660
Nicolas Capens97da7822019-04-30 17:33:26 -04004661 Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
Nicolas Capens97da7822019-04-30 17:33:26 -04004662 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
Nicolas Capens7d867272019-04-08 22:51:08 -04004663
Nicolas Capens125dba02019-04-24 02:03:22 -04004664 uint32_t imageOperands = spv::ImageOperandsMaskNone;
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004665 bool lodOrBias = false;
4666 Object::ID lodOrBiasId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004667 bool grad = false;
Nicolas Capens420d9da2019-04-26 17:44:42 -04004668 Object::ID gradDxId = 0;
4669 Object::ID gradDyId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004670 bool constOffset = false;
Nicolas Capens022bd572019-04-29 23:45:25 -04004671 Object::ID offsetId = 0;
Nicolas Capens125dba02019-04-24 02:03:22 -04004672 bool sample = false;
4673
Chris Forbesc71c17f2019-05-04 10:01:04 -07004674 uint32_t operand = instruction.isDref() ? 6 : 5;
4675
4676 if(insn.wordCount() > operand)
Nicolas Capens125dba02019-04-24 02:03:22 -04004677 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07004678 imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
Nicolas Capens125dba02019-04-24 02:03:22 -04004679
4680 if(imageOperands & spv::ImageOperandsBiasMask)
4681 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004682 lodOrBias = true;
4683 lodOrBiasId = insn.word(operand);
4684 operand++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004685 imageOperands &= ~spv::ImageOperandsBiasMask;
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004686
4687 ASSERT(instruction.samplerMethod == Implicit);
4688 instruction.samplerMethod = Bias;
Nicolas Capens125dba02019-04-24 02:03:22 -04004689 }
4690
4691 if(imageOperands & spv::ImageOperandsLodMask)
4692 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004693 lodOrBias = true;
4694 lodOrBiasId = insn.word(operand);
Nicolas Capens125dba02019-04-24 02:03:22 -04004695 operand++;
4696 imageOperands &= ~spv::ImageOperandsLodMask;
4697 }
4698
4699 if(imageOperands & spv::ImageOperandsGradMask)
4700 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004701 ASSERT(!lodOrBias); // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
Nicolas Capens125dba02019-04-24 02:03:22 -04004702 grad = true;
Nicolas Capens420d9da2019-04-26 17:44:42 -04004703 gradDxId = insn.word(operand + 0);
4704 gradDyId = insn.word(operand + 1);
4705 operand += 2;
Nicolas Capens125dba02019-04-24 02:03:22 -04004706 imageOperands &= ~spv::ImageOperandsGradMask;
4707 }
4708
4709 if(imageOperands & spv::ImageOperandsConstOffsetMask)
4710 {
Nicolas Capens125dba02019-04-24 02:03:22 -04004711 constOffset = true;
Nicolas Capens022bd572019-04-29 23:45:25 -04004712 offsetId = insn.word(operand);
4713 operand++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004714 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
4715 }
4716
4717 if(imageOperands & spv::ImageOperandsSampleMask)
4718 {
4719 UNIMPLEMENTED("Image operand %x", spv::ImageOperandsSampleMask); (void)sample;
4720 sample = true;
4721 imageOperands &= ~spv::ImageOperandsSampleMask;
4722 }
4723
4724 if(imageOperands != 0)
4725 {
4726 UNIMPLEMENTED("Image operand %x", imageOperands);
4727 }
4728 }
4729
Nicolas Capens420d9da2019-04-26 17:44:42 -04004730 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
Nicolas Capens125dba02019-04-24 02:03:22 -04004731
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004732 uint32_t coordinates = coordinateType.sizeInComponents - instruction.isProj();
4733 instruction.coordinates = coordinates;
4734
Nicolas Capens125dba02019-04-24 02:03:22 -04004735 uint32_t i = 0;
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004736 for( ; i < coordinates; i++)
Nicolas Capens125dba02019-04-24 02:03:22 -04004737 {
Nicolas Capens5b09dd12019-04-30 01:05:28 -04004738 if(instruction.isProj())
4739 {
4740 in[i] = coordinate.Float(i) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
4741 }
4742 else
4743 {
4744 in[i] = coordinate.Float(i);
4745 }
4746 }
4747
4748 if(instruction.isDref())
4749 {
Chris Forbesc71c17f2019-05-04 10:01:04 -07004750 auto drefValue = GenericValue(this, state->routine, insn.word(5));
4751 in[i] = drefValue.Float(0);
4752 i++;
Nicolas Capens125dba02019-04-24 02:03:22 -04004753 }
4754
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004755 if(lodOrBias)
Nicolas Capens125dba02019-04-24 02:03:22 -04004756 {
Nicolas Capens4bade2e2019-04-30 16:21:06 -04004757 auto lodValue = GenericValue(this, state->routine, lodOrBiasId);
Nicolas Capens125dba02019-04-24 02:03:22 -04004758 in[i] = lodValue.Float(0);
4759 i++;
4760 }
Nicolas Capens420d9da2019-04-26 17:44:42 -04004761 else if(grad)
4762 {
4763 auto dxValue = GenericValue(this, state->routine, gradDxId);
4764 auto dyValue = GenericValue(this, state->routine, gradDyId);
4765 auto &dxyType = getType(dxValue.type);
4766 ASSERT(dxyType.sizeInComponents == getType(dyValue.type).sizeInComponents);
4767
4768 instruction.gradComponents = dxyType.sizeInComponents;
4769
Nicolas Capens022bd572019-04-29 23:45:25 -04004770 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
Nicolas Capens420d9da2019-04-26 17:44:42 -04004771 {
4772 in[i] = dxValue.Float(j);
Nicolas Capens420d9da2019-04-26 17:44:42 -04004773 }
4774
Nicolas Capens022bd572019-04-29 23:45:25 -04004775 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
Nicolas Capens420d9da2019-04-26 17:44:42 -04004776 {
4777 in[i] = dyValue.Float(j);
Nicolas Capens022bd572019-04-29 23:45:25 -04004778 }
4779 }
4780
4781 if(constOffset)
4782 {
4783 auto offsetValue = GenericValue(this, state->routine, offsetId);
4784 auto &offsetType = getType(offsetValue.type);
4785
4786 instruction.samplerOption = Offset;
4787 instruction.offsetComponents = offsetType.sizeInComponents;
4788
4789 for(uint32_t j = 0; j < offsetType.sizeInComponents; j++, i++)
4790 {
4791 in[i] = offsetValue.Float(j); // Integer values, but transfered as float.
Nicolas Capens420d9da2019-04-26 17:44:42 -04004792 }
4793 }
4794
Chris Forbes45f9a932019-05-08 13:30:38 -07004795 auto samplerFunc = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
Nicolas Capens7d867272019-04-08 22:51:08 -04004796
Ben Clayton96fbe082019-04-16 19:28:11 -04004797 Array<SIMD::Float> out(4);
Nicolas Capens97da7822019-04-30 17:33:26 -04004798 Call<ImageSampler>(samplerFunc, texture, sampler, &in[0], &out[0], state->routine->constants);
Nicolas Capens7d867272019-04-08 22:51:08 -04004799
Chris Forbesc71c17f2019-05-04 10:01:04 -07004800 for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
Nicolas Capens7d867272019-04-08 22:51:08 -04004801
4802 return EmitResult::Continue;
4803 }
4804
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004805 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
4806 {
Ben Clayton0264d8e2019-05-08 15:39:40 +01004807 auto &resultTy = getType(Type::ID(insn.word(1)));
4808 auto resultId = Object::ID(insn.word(2));
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004809 auto imageId = Object::ID(insn.word(3));
Ben Clayton0264d8e2019-05-08 15:39:40 +01004810 auto lodId = Object::ID(0);
4811
4812 auto &dst = state->routine->createIntermediate(resultId, resultTy.sizeInComponents);
4813 GetImageDimensions(state->routine, resultTy, imageId, lodId, dst);
4814
4815 return EmitResult::Continue;
4816 }
4817
4818 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
4819 {
4820 auto &resultTy = getType(Type::ID(insn.word(1)));
4821 auto resultId = Object::ID(insn.word(2));
4822 auto imageId = Object::ID(insn.word(3));
4823 auto lodId = Object::ID(insn.word(4));
4824
4825 auto &dst = state->routine->createIntermediate(resultId, resultTy.sizeInComponents);
4826 GetImageDimensions(state->routine, resultTy, imageId, lodId, dst);
4827
4828 return EmitResult::Continue;
4829 }
4830
4831 void SpirvShader::GetImageDimensions(SpirvRoutine const *routine, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
4832 {
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004833 auto &image = getObject(imageId);
4834 auto &imageType = getType(image.type);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004835
4836 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
4837 bool isArrayed = imageType.definition.word(5) != 0;
4838 bool isCubeMap = imageType.definition.word(3) == spv::DimCube;
4839
4840 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
Ben Clayton0264d8e2019-05-08 15:39:40 +01004841 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004842 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
4843
Ben Clayton0264d8e2019-05-08 15:39:40 +01004844 Pointer<Byte> descriptor = routine->getPointer(imageId).base;
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004845
Ben Clayton0264d8e2019-05-08 15:39:40 +01004846 Pointer<Int> extent;
4847 Int arrayLayers;
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004848
4849 switch (bindingLayout.descriptorType)
4850 {
4851 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4852 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4853 {
Ben Clayton0264d8e2019-05-08 15:39:40 +01004854 extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
4855 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
4856 break;
4857 }
4858 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
Chris Forbese2285022019-05-08 16:09:53 -07004859 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4860 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
Ben Clayton0264d8e2019-05-08 15:39:40 +01004861 {
4862 extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
4863 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004864 break;
4865 }
4866 default:
Ben Clayton92797c22019-04-25 10:44:03 +01004867 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004868 }
4869
Ben Clayton0264d8e2019-05-08 15:39:40 +01004870 auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
4871 std::vector<Int> out;
4872 if (lodId != 0)
4873 {
4874 auto lodVal = GenericValue(this, routine, lodId);
4875 ASSERT(getType(lodVal.type).sizeInComponents == 1);
4876 auto lod = lodVal.Int(0);
4877 auto one = SIMD::Int(1);
4878 for (uint32_t i = 0; i < dimensions; i++)
4879 {
4880 dst.move(i, Max(SIMD::Int(extent[i]) >> lod, one));
4881 }
4882 }
4883 else
4884 {
4885 for (uint32_t i = 0; i < dimensions; i++)
4886 {
4887 dst.move(i, SIMD::Int(extent[i]));
4888 }
4889 }
4890
4891 if (isArrayed)
4892 {
4893 auto numElements = isCubeMap ? (arrayLayers / 6) : RValue<Int>(arrayLayers);
4894 dst.move(dimensions, SIMD::Int(numElements));
4895 }
Chris Forbesb0d00ea2019-04-17 20:24:20 -07004896 }
4897
Chris Forbes011744e2019-05-06 14:21:45 -07004898 SIMD::Pointer SpirvShader::GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
Chris Forbes89c37a42019-04-17 18:28:33 -07004899 {
Chris Forbes89c37a42019-04-17 18:28:33 -07004900 bool isArrayed = imageType.definition.word(5) != 0;
Chris Forbes24466042019-04-22 10:54:23 -07004901 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
Chris Forbes89c37a42019-04-17 18:28:33 -07004902 int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
4903
Chris Forbes24466042019-04-22 10:54:23 -07004904 SIMD::Int u = coordinate.Int(0);
4905 SIMD::Int v = (getType(coordinate.type).sizeInComponents > 1) ? coordinate.Int(1) : RValue<SIMD::Int>(0);
4906 if (dim == spv::DimSubpassData)
4907 {
4908 u += routine->windowSpacePosition[0];
4909 v += routine->windowSpacePosition[1];
4910 }
4911
Chris Forbes011744e2019-05-06 14:21:45 -07004912 if (useStencilAspect)
4913 {
4914 // Adjust addressing for quad layout. Pitches are already correct for the stencil aspect.
4915 // In the quad-layout block, pixel order is [x0,y0 x1,y0 x0,y1 x1,y1]
4916 u = ((v & SIMD::Int(1)) << 1) | ((u << 1) - (u & SIMD::Int(1)));
4917 v &= SIMD::Int(~1);
4918 }
4919
4920 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
4921 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
4922 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
4923 auto slicePitch = SIMD::Int(
4924 *Pointer<Int>(descriptor + (useStencilAspect
4925 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
4926 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
4927 auto samplePitch = SIMD::Int(
4928 *Pointer<Int>(descriptor + (useStencilAspect
4929 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
4930 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
4931
Chris Forbes24466042019-04-22 10:54:23 -07004932 ptr += u * SIMD::Int(texelSize);
Chris Forbes89c37a42019-04-17 18:28:33 -07004933 if (dims > 1)
4934 {
Chris Forbes011744e2019-05-06 14:21:45 -07004935 ptr += v * rowPitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004936 }
4937 if (dims > 2)
4938 {
Chris Forbes011744e2019-05-06 14:21:45 -07004939 ptr += coordinate.Int(2) * slicePitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004940 }
4941 if (isArrayed)
4942 {
Chris Forbes011744e2019-05-06 14:21:45 -07004943 ptr += coordinate.Int(dims) * slicePitch;
Chris Forbes89c37a42019-04-17 18:28:33 -07004944 }
4945
Chris Forbes52a3bba2019-05-03 15:11:41 -07004946 if (sampleId.value())
4947 {
4948 GenericValue sample{this, routine, sampleId};
Chris Forbes011744e2019-05-06 14:21:45 -07004949 ptr += sample.Int(0) * samplePitch;
Chris Forbes52a3bba2019-05-03 15:11:41 -07004950 }
4951
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04004952 return ptr;
Chris Forbes89c37a42019-04-17 18:28:33 -07004953 }
4954
Ben Claytonecfeede2019-05-08 08:51:01 +01004955 void SpirvShader::Yield(YieldResult res) const
4956 {
4957 rr::Yield(RValue<Int>(int(res)));
4958 }
4959
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07004960 SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
4961 {
4962 auto &resultType = getType(Type::ID(insn.word(1)));
4963 auto imageId = Object::ID(insn.word(3));
4964 auto &image = getObject(imageId);
4965 auto &imageType = getType(image.type);
4966 Object::ID resultId = insn.word(2);
4967
Chris Forbes52a3bba2019-05-03 15:11:41 -07004968 Object::ID sampleId = 0;
4969
4970 if (insn.wordCount() > 5)
4971 {
4972 int operand = 6;
4973 auto imageOperands = insn.word(5);
4974 if (imageOperands & spv::ImageOperandsSampleMask)
4975 {
4976 sampleId = insn.word(operand++);
4977 imageOperands &= ~spv::ImageOperandsSampleMask;
4978 }
4979
4980 // Should be no remaining image operands.
4981 ASSERT(!imageOperands);
4982 }
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07004983
4984 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
Chris Forbes24466042019-04-22 10:54:23 -07004985 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07004986
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07004987 auto coordinate = GenericValue(this, state->routine, insn.word(4));
Chris Forbes24466042019-04-22 10:54:23 -07004988 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07004989
Chris Forbes24466042019-04-22 10:54:23 -07004990 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
4991 // the renderpass data instead. In all other cases, we can use the format in the instruction.
4992 auto vkFormat = (dim == spv::DimSubpassData)
4993 ? inputAttachmentFormats[d.InputAttachmentIndex]
4994 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
Chris Forbes011744e2019-05-06 14:21:45 -07004995
4996 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
4997 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
4998 auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
4999 getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
5000
5001 if (useStencilAspect)
5002 {
5003 vkFormat = VK_FORMAT_S8_UINT;
5004 }
5005
5006 auto pointer = state->routine->getPointer(imageId);
5007 Pointer<Byte> binding = pointer.base;
5008 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
5009 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
5010 : OFFSET(vk::StorageImageDescriptor, ptr)));
5011
5012 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5013
5014 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
5015
Chris Forbes24466042019-04-22 10:54:23 -07005016 auto texelSize = vk::Format(vkFormat).bytes();
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005017 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005018 auto texelPtr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005019
Chris Forbes24466042019-04-22 10:54:23 -07005020 SIMD::Int packed[4];
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005021 // Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
5022 // of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
5023 // TODO: specialize for small formats?
5024 for (auto i = 0; i < (texelSize + 3)/4; i++)
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005025 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005026 packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
5027 texelPtr += sizeof(float);
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005028 }
5029
Chris Forbesa32d6302019-04-26 14:19:04 -07005030 // Format support requirements here come from two sources:
5031 // - Minimum required set of formats for loads from storage images
5032 // - Any format supported as a color or depth/stencil attachment, for input attachments
Chris Forbes24466042019-04-22 10:54:23 -07005033 switch(vkFormat)
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005034 {
Chris Forbes24466042019-04-22 10:54:23 -07005035 case VK_FORMAT_R32G32B32A32_SFLOAT:
5036 case VK_FORMAT_R32G32B32A32_SINT:
5037 case VK_FORMAT_R32G32B32A32_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005038 dst.move(0, packed[0]);
5039 dst.move(1, packed[1]);
5040 dst.move(2, packed[2]);
5041 dst.move(3, packed[3]);
5042 break;
Chris Forbes24466042019-04-22 10:54:23 -07005043 case VK_FORMAT_R32_SINT:
5044 case VK_FORMAT_R32_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005045 dst.move(0, packed[0]);
5046 // Fill remaining channels with 0,0,1 (of the correct type)
5047 dst.move(1, SIMD::Int(0));
5048 dst.move(2, SIMD::Int(0));
5049 dst.move(3, SIMD::Int(1));
5050 break;
Chris Forbes24466042019-04-22 10:54:23 -07005051 case VK_FORMAT_R32_SFLOAT:
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005052 case VK_FORMAT_D32_SFLOAT:
Chris Forbes011744e2019-05-06 14:21:45 -07005053 case VK_FORMAT_D32_SFLOAT_S8_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005054 dst.move(0, packed[0]);
5055 // Fill remaining channels with 0,0,1 (of the correct type)
5056 dst.move(1, SIMD::Float(0));
5057 dst.move(2, SIMD::Float(0));
5058 dst.move(3, SIMD::Float(1));
5059 break;
Chris Forbesa5f4eb62019-04-22 17:46:20 -07005060 case VK_FORMAT_D16_UNORM:
5061 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
5062 dst.move(1, SIMD::Float(0));
5063 dst.move(2, SIMD::Float(0));
5064 dst.move(3, SIMD::Float(1));
5065 break;
Chris Forbes24466042019-04-22 10:54:23 -07005066 case VK_FORMAT_R16G16B16A16_SINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005067 dst.move(0, (packed[0] << 16) >> 16);
5068 dst.move(1, (packed[0]) >> 16);
5069 dst.move(2, (packed[1] << 16) >> 16);
5070 dst.move(3, (packed[1]) >> 16);
5071 break;
Chris Forbes24466042019-04-22 10:54:23 -07005072 case VK_FORMAT_R16G16B16A16_UINT:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005073 dst.move(0, packed[0] & SIMD::Int(0xffff));
5074 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5075 dst.move(2, packed[1] & SIMD::Int(0xffff));
5076 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
5077 break;
Chris Forbes24466042019-04-22 10:54:23 -07005078 case VK_FORMAT_R16G16B16A16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005079 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5080 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
5081 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
5082 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005083 break;
Chris Forbes24466042019-04-22 10:54:23 -07005084 case VK_FORMAT_R8G8B8A8_SNORM:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005085 dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5086 dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5087 dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5088 dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5089 break;
Chris Forbes24466042019-04-22 10:54:23 -07005090 case VK_FORMAT_R8G8B8A8_UNORM:
Chris Forbesa32d6302019-04-26 14:19:04 -07005091 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005092 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5093 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5094 dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5095 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5096 break;
Chris Forbesa32d6302019-04-26 14:19:04 -07005097 case VK_FORMAT_R8G8B8A8_SRGB:
5098 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
5099 dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5100 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5101 dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5102 dst.move(3, ::sRGBtoLinear(SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5103 break;
5104 case VK_FORMAT_B8G8R8A8_UNORM:
5105 dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5106 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5107 dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5108 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5109 break;
5110 case VK_FORMAT_B8G8R8A8_SRGB:
5111 dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5112 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5113 dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5114 dst.move(3, ::sRGBtoLinear(SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5115 break;
Chris Forbes24466042019-04-22 10:54:23 -07005116 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbesa32d6302019-04-26 14:19:04 -07005117 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005118 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5119 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5120 dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF)));
5121 dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF)));
5122 break;
Chris Forbes24466042019-04-22 10:54:23 -07005123 case VK_FORMAT_R8G8B8A8_SINT:
Chris Forbesa32d6302019-04-26 14:19:04 -07005124 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005125 dst.move(0, (packed[0] << 24) >> 24);
5126 dst.move(1, (packed[0] << 16) >> 24);
5127 dst.move(2, (packed[0] << 8) >> 24);
5128 dst.move(3, (packed[0]) >> 24);
5129 break;
Chris Forbesf5c89362019-04-26 13:41:41 -07005130 case VK_FORMAT_R8_UNORM:
5131 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5132 dst.move(1, SIMD::Float(0));
5133 dst.move(2, SIMD::Float(0));
5134 dst.move(3, SIMD::Float(1));
5135 break;
5136 case VK_FORMAT_R8_UINT:
Chris Forbes011744e2019-05-06 14:21:45 -07005137 case VK_FORMAT_S8_UINT:
Chris Forbesf5c89362019-04-26 13:41:41 -07005138 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5139 dst.move(1, SIMD::UInt(0));
5140 dst.move(2, SIMD::UInt(0));
5141 dst.move(3, SIMD::UInt(1));
5142 break;
5143 case VK_FORMAT_R8_SINT:
5144 dst.move(0, (packed[0] << 24) >> 24);
5145 dst.move(1, SIMD::Int(0));
5146 dst.move(2, SIMD::Int(0));
5147 dst.move(3, SIMD::Int(1));
5148 break;
5149 case VK_FORMAT_R8G8_UNORM:
5150 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5151 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5152 dst.move(2, SIMD::Float(0));
5153 dst.move(3, SIMD::Float(1));
5154 break;
5155 case VK_FORMAT_R8G8_UINT:
5156 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5157 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5158 dst.move(2, SIMD::UInt(0));
5159 dst.move(3, SIMD::UInt(1));
5160 break;
5161 case VK_FORMAT_R8G8_SINT:
5162 dst.move(0, (packed[0] << 24) >> 24);
5163 dst.move(1, (packed[0] << 16) >> 24);
5164 dst.move(2, SIMD::Int(0));
5165 dst.move(3, SIMD::Int(1));
5166 break;
Chris Forbesa32d6302019-04-26 14:19:04 -07005167 case VK_FORMAT_R16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005168 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
Chris Forbesa32d6302019-04-26 14:19:04 -07005169 dst.move(1, SIMD::Float(0));
5170 dst.move(2, SIMD::Float(0));
5171 dst.move(3, SIMD::Float(1));
5172 break;
5173 case VK_FORMAT_R16_UINT:
5174 dst.move(0, packed[0] & SIMD::Int(0xffff));
5175 dst.move(1, SIMD::UInt(0));
5176 dst.move(2, SIMD::UInt(0));
5177 dst.move(3, SIMD::UInt(1));
5178 break;
5179 case VK_FORMAT_R16_SINT:
5180 dst.move(0, (packed[0] << 16) >> 16);
5181 dst.move(1, SIMD::Int(0));
5182 dst.move(2, SIMD::Int(0));
5183 dst.move(3, SIMD::Int(1));
5184 break;
5185 case VK_FORMAT_R16G16_SFLOAT:
Chris Forbesd3546952019-04-30 19:32:19 -07005186 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5187 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
Chris Forbesa32d6302019-04-26 14:19:04 -07005188 dst.move(2, SIMD::Float(0));
5189 dst.move(3, SIMD::Float(1));
5190 break;
5191 case VK_FORMAT_R16G16_UINT:
5192 dst.move(0, packed[0] & SIMD::Int(0xffff));
5193 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5194 dst.move(2, SIMD::UInt(0));
5195 dst.move(3, SIMD::UInt(1));
5196 break;
5197 case VK_FORMAT_R16G16_SINT:
5198 dst.move(0, (packed[0] << 16) >> 16);
5199 dst.move(1, (packed[0]) >> 16);
5200 dst.move(2, SIMD::Int(0));
5201 dst.move(3, SIMD::Int(1));
5202 break;
5203 case VK_FORMAT_R32G32_SINT:
5204 case VK_FORMAT_R32G32_UINT:
5205 dst.move(0, packed[0]);
5206 dst.move(1, packed[1]);
5207 dst.move(2, SIMD::Int(0));
5208 dst.move(3, SIMD::Int(1));
5209 break;
5210 case VK_FORMAT_R32G32_SFLOAT:
5211 dst.move(0, packed[0]);
5212 dst.move(1, packed[1]);
5213 dst.move(2, SIMD::Float(0));
5214 dst.move(3, SIMD::Float(1));
5215 break;
Chris Forbesdcc9fd72019-05-03 07:35:14 -07005216 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
5217 dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
5218 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
5219 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
5220 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
5221 break;
Chris Forbes8aba20f2019-05-03 09:06:48 -07005222 case VK_FORMAT_R5G6B5_UNORM_PACK16:
5223 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5224 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
5225 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5226 dst.move(3, SIMD::Float(1));
5227 break;
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005228 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005229 UNIMPLEMENTED("spv::ImageFormat %d", int(vkFormat));
5230 break;
Chris Forbes2f7f2ec2019-04-17 16:58:15 -07005231 }
5232
5233 return EmitResult::Continue;
5234 }
5235
Chris Forbes179f0142019-04-17 20:24:44 -07005236 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
5237 {
5238 auto imageId = Object::ID(insn.word(1));
5239 auto &image = getObject(imageId);
5240 auto &imageType = getType(image.type);
5241
5242 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5243
5244 // Not handling any image operands yet.
5245 ASSERT(insn.wordCount() == 4);
5246
Chris Forbes179f0142019-04-17 20:24:44 -07005247 auto coordinate = GenericValue(this, state->routine, insn.word(2));
5248 auto texel = GenericValue(this, state->routine, insn.word(3));
5249
Chris Forbes621a7bd2019-04-19 08:28:00 -07005250 Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Chris Forbes179f0142019-04-17 20:24:44 -07005251 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005252 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
Chris Forbes179f0142019-04-17 20:24:44 -07005253
5254 SIMD::Int packed[4];
5255 auto numPackedElements = 0u;
5256 int texelSize = 0;
5257 auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
5258 switch (format)
5259 {
5260 case spv::ImageFormatRgba32f:
5261 case spv::ImageFormatRgba32i:
5262 case spv::ImageFormatRgba32ui:
5263 texelSize = 16;
5264 packed[0] = texel.Int(0);
5265 packed[1] = texel.Int(1);
5266 packed[2] = texel.Int(2);
5267 packed[3] = texel.Int(3);
5268 numPackedElements = 4;
5269 break;
5270 case spv::ImageFormatR32f:
5271 case spv::ImageFormatR32i:
5272 case spv::ImageFormatR32ui:
5273 texelSize = 4;
5274 packed[0] = texel.Int(0);
5275 numPackedElements = 1;
5276 break;
5277 case spv::ImageFormatRgba8:
5278 texelSize = 4;
5279 packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
5280 ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
5281 ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
5282 ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
5283 numPackedElements = 1;
5284 break;
5285 case spv::ImageFormatRgba8Snorm:
5286 texelSize = 4;
5287 packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5288 SIMD::Int(0xFF)) |
5289 ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5290 SIMD::Int(0xFF)) << 8) |
5291 ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5292 SIMD::Int(0xFF)) << 16) |
5293 ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5294 SIMD::Int(0xFF)) << 24);
5295 numPackedElements = 1;
5296 break;
5297 case spv::ImageFormatRgba8i:
5298 case spv::ImageFormatRgba8ui:
5299 texelSize = 4;
5300 packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
5301 (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
5302 (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
5303 (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
5304 numPackedElements = 1;
5305 break;
5306 case spv::ImageFormatRgba16f:
5307 texelSize = 8;
5308 packed[0] = FloatToHalfBits(texel.UInt(0), false) | FloatToHalfBits(texel.UInt(1), true);
5309 packed[1] = FloatToHalfBits(texel.UInt(2), false) | FloatToHalfBits(texel.UInt(3), true);
5310 numPackedElements = 2;
5311 break;
5312 case spv::ImageFormatRgba16i:
5313 case spv::ImageFormatRgba16ui:
5314 texelSize = 8;
5315 packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
5316 packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
5317 numPackedElements = 2;
5318 break;
Ben Clayton92797c22019-04-25 10:44:03 +01005319 case spv::ImageFormatRg32f:
5320 case spv::ImageFormatRg16f:
5321 case spv::ImageFormatR11fG11fB10f:
5322 case spv::ImageFormatR16f:
5323 case spv::ImageFormatRgba16:
5324 case spv::ImageFormatRgb10A2:
5325 case spv::ImageFormatRg16:
5326 case spv::ImageFormatRg8:
5327 case spv::ImageFormatR16:
5328 case spv::ImageFormatR8:
5329 case spv::ImageFormatRgba16Snorm:
5330 case spv::ImageFormatRg16Snorm:
5331 case spv::ImageFormatRg8Snorm:
5332 case spv::ImageFormatR16Snorm:
5333 case spv::ImageFormatR8Snorm:
5334 case spv::ImageFormatRg32i:
5335 case spv::ImageFormatRg16i:
5336 case spv::ImageFormatRg8i:
5337 case spv::ImageFormatR16i:
5338 case spv::ImageFormatR8i:
5339 case spv::ImageFormatRgb10a2ui:
5340 case spv::ImageFormatRg32ui:
5341 case spv::ImageFormatRg16ui:
5342 case spv::ImageFormatRg8ui:
5343 case spv::ImageFormatR16ui:
5344 case spv::ImageFormatR8ui:
5345 UNIMPLEMENTED("spv::ImageFormat %d", int(format));
5346 break;
5347
Chris Forbes179f0142019-04-17 20:24:44 -07005348 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005349 UNREACHABLE("spv::ImageFormat %d", int(format));
5350 break;
Chris Forbes179f0142019-04-17 20:24:44 -07005351 }
5352
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005353 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005354 auto texelPtr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, texelSize, 0, false);
Chris Forbes179f0142019-04-17 20:24:44 -07005355
5356 for (auto i = 0u; i < numPackedElements; i++)
5357 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005358 SIMD::Store(texelPtr, packed[i], state->activeLaneMask());
5359 texelPtr += sizeof(float);
Chris Forbes179f0142019-04-17 20:24:44 -07005360 }
5361
5362 return EmitResult::Continue;
5363 }
5364
Chris Forbesb51f2c12019-04-18 11:01:30 -07005365 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState *state) const
5366 {
5367 auto &resultType = getType(Type::ID(insn.word(1)));
5368 auto imageId = Object::ID(insn.word(3));
5369 auto &image = getObject(imageId);
5370 // Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
5371 // Look through to get the actual image type.
5372 auto &imageType = getType(getType(image.type).element);
5373 Object::ID resultId = insn.word(2);
5374
5375 ASSERT(imageType.opcode() == spv::OpTypeImage);
5376 ASSERT(resultType.storageClass == spv::StorageClassImage);
5377 ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
5378
Chris Forbesb51f2c12019-04-18 11:01:30 -07005379 auto coordinate = GenericValue(this, state->routine, insn.word(4));
5380
Chris Forbes621a7bd2019-04-19 08:28:00 -07005381 Pointer<Byte> binding = state->routine->getPointer(imageId).base;
Chris Forbesb51f2c12019-04-18 11:01:30 -07005382 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005383 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
Chris Forbesb51f2c12019-04-18 11:01:30 -07005384
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005385 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
Chris Forbes011744e2019-05-06 14:21:45 -07005386 auto ptr = GetTexelAddress(state->routine, basePtr, coordinate, imageType, binding, sizeof(uint32_t), 0, false);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005387
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005388 state->routine->createPointer(resultId, ptr);
Chris Forbesb51f2c12019-04-18 11:01:30 -07005389
5390 return EmitResult::Continue;
5391 }
5392
Chris Forbesfa82c342019-04-26 16:42:38 -07005393 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
5394 {
5395 // Propagate the image pointer in both cases.
5396 // Consumers of OpSampledImage will look through to find the sampler pointer.
5397
5398 Object::ID resultId = insn.word(2);
5399 Object::ID imageId = insn.word(3);
5400
5401 state->routine->createPointer(resultId, state->routine->getPointer(imageId));
5402
5403 return EmitResult::Continue;
5404 }
5405
Chris Forbes17813932019-04-18 11:45:54 -07005406 SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState *state) const
5407 {
5408 auto &resultType = getType(Type::ID(insn.word(1)));
5409 Object::ID resultId = insn.word(2);
5410 Object::ID semanticsId = insn.word(5);
5411 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
5412 auto memoryOrder = MemoryOrder(memorySemantics);
Chris Forbes707ed992019-04-18 18:17:35 -07005413 // Where no value is provided (increment/decrement) use an implicit value of 1.
5414 auto value = (insn.wordCount() == 7) ? GenericValue(this, state->routine, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
Chris Forbes17813932019-04-18 11:45:54 -07005415 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005416 auto ptr = state->routine->getPointer(insn.word(3));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005417 auto ptrOffsets = ptr.offsets();
Chris Forbes17813932019-04-18 11:45:54 -07005418
5419 SIMD::UInt x;
5420 for (int j = 0; j < SIMD::Width; j++)
5421 {
5422 If(Extract(state->activeLaneMask(), j) != 0)
5423 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005424 auto offset = Extract(ptrOffsets, j);
Chris Forbes707ed992019-04-18 18:17:35 -07005425 auto laneValue = Extract(value, j);
Chris Forbes17813932019-04-18 11:45:54 -07005426 UInt v;
5427 switch (insn.opcode())
5428 {
5429 case spv::OpAtomicIAdd:
Chris Forbes707ed992019-04-18 18:17:35 -07005430 case spv::OpAtomicIIncrement:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005431 v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005432 break;
Chris Forbes707ed992019-04-18 18:17:35 -07005433 case spv::OpAtomicISub:
5434 case spv::OpAtomicIDecrement:
5435 v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5436 break;
Chris Forbes17813932019-04-18 11:45:54 -07005437 case spv::OpAtomicAnd:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005438 v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005439 break;
5440 case spv::OpAtomicOr:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005441 v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005442 break;
5443 case spv::OpAtomicXor:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005444 v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005445 break;
5446 case spv::OpAtomicSMin:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005447 v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
Chris Forbes17813932019-04-18 11:45:54 -07005448 break;
5449 case spv::OpAtomicSMax:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005450 v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
Chris Forbes17813932019-04-18 11:45:54 -07005451 break;
5452 case spv::OpAtomicUMin:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005453 v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005454 break;
5455 case spv::OpAtomicUMax:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005456 v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005457 break;
5458 case spv::OpAtomicExchange:
Ben Clayton5f7e9112019-04-16 11:03:40 -04005459 v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
Chris Forbes17813932019-04-18 11:45:54 -07005460 break;
5461 default:
Ben Clayton92797c22019-04-25 10:44:03 +01005462 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
Chris Forbes17813932019-04-18 11:45:54 -07005463 break;
5464 }
5465 x = Insert(x, v, j);
5466 }
5467 }
5468
5469 dst.move(0, x);
5470 return EmitResult::Continue;
5471 }
5472
Chris Forbesa16238d2019-04-18 16:31:54 -07005473 SpirvShader::EmitResult SpirvShader::EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const
5474 {
5475 // Separate from EmitAtomicOp due to different instruction encoding
5476 auto &resultType = getType(Type::ID(insn.word(1)));
5477 Object::ID resultId = insn.word(2);
5478
5479 auto memorySemanticsEqual = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(5)).constantValue[0]);
5480 auto memoryOrderEqual = MemoryOrder(memorySemanticsEqual);
5481 auto memorySemanticsUnequal = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(6)).constantValue[0]);
5482 auto memoryOrderUnequal = MemoryOrder(memorySemanticsUnequal);
5483
5484 auto value = GenericValue(this, state->routine, insn.word(7));
5485 auto comparator = GenericValue(this, state->routine, insn.word(8));
5486 auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005487 auto ptr = state->routine->getPointer(insn.word(3));
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005488 auto ptrOffsets = ptr.offsets();
Chris Forbesa16238d2019-04-18 16:31:54 -07005489
5490 SIMD::UInt x;
5491 for (int j = 0; j < SIMD::Width; j++)
5492 {
5493 If(Extract(state->activeLaneMask(), j) != 0)
5494 {
Ben Clayton9e4bc1b2019-04-16 16:52:02 -04005495 auto offset = Extract(ptrOffsets, j);
Chris Forbesa16238d2019-04-18 16:31:54 -07005496 auto laneValue = Extract(value.UInt(0), j);
5497 auto laneComparator = Extract(comparator.UInt(0), j);
Ben Clayton5f7e9112019-04-16 11:03:40 -04005498 UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
Chris Forbesa16238d2019-04-18 16:31:54 -07005499 x = Insert(x, v, j);
5500 }
5501 }
5502
5503 dst.move(0, x);
5504 return EmitResult::Continue;
5505 }
5506
Ben Clayton78abf372019-05-09 15:11:58 +01005507 SpirvShader::EmitResult SpirvShader::EmitCopyObject(InsnIterator insn, EmitState *state) const
5508 {
5509 auto ty = getType(insn.word(1));
5510 auto &dst = state->routine->createIntermediate(insn.word(2), ty.sizeInComponents);
5511 auto src = GenericValue(this, state->routine, insn.word(3));
5512 for (uint32_t i = 0; i < ty.sizeInComponents; i++)
5513 {
5514 dst.move(i, src.Int(i));
5515 }
5516 return EmitResult::Continue;
5517 }
5518
Ben Claytonb5a45462019-04-30 19:21:29 +01005519 SpirvShader::EmitResult SpirvShader::EmitCopyMemory(InsnIterator insn, EmitState *state) const
5520 {
5521 Object::ID dstPtrId = insn.word(1);
5522 Object::ID srcPtrId = insn.word(2);
5523 auto &dstPtrTy = getType(getObject(dstPtrId).type);
5524 auto &srcPtrTy = getType(getObject(srcPtrId).type);
5525 ASSERT(dstPtrTy.element == srcPtrTy.element);
5526
5527 bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
5528 bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
5529 auto dstPtr = state->routine->getPointer(dstPtrId);
5530 auto srcPtr = state->routine->getPointer(srcPtrId);
5531
5532 std::unordered_map<uint32_t, uint32_t> srcOffsets;
5533
5534 VisitMemoryObject(srcPtrId, [&](uint32_t i, uint32_t srcOffset) { srcOffsets[i] = srcOffset; });
5535
5536 VisitMemoryObject(dstPtrId, [&](uint32_t i, uint32_t dstOffset)
5537 {
5538 auto it = srcOffsets.find(i);
5539 ASSERT(it != srcOffsets.end());
5540 auto srcOffset = it->second;
5541
5542 auto dst = dstPtr + dstOffset;
5543 auto src = srcPtr + srcOffset;
5544 if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
5545 if (srcInterleavedByLane) { src = interleaveByLane(src); }
5546 SIMD::Store(dst, SIMD::Load<SIMD::Float>(src, state->activeLaneMask()), state->activeLaneMask());
5547 });
5548 return EmitResult::Continue;
5549 }
5550
Ben Claytonecfeede2019-05-08 08:51:01 +01005551 SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
5552 {
5553 auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
5554 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
5555 // TODO: We probably want to consider the memory scope here. For now,
5556 // just always emit the full fence.
5557 Fence(semantics);
5558
5559 switch (executionScope)
5560 {
5561 case spv::ScopeWorkgroup:
5562 case spv::ScopeSubgroup:
5563 Yield(YieldResult::ControlBarrier);
5564 break;
5565 default:
5566 // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
5567 UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
5568 break;
5569 }
5570
5571 return EmitResult::Continue;
5572 }
5573
Ben Claytonb16c5862019-05-08 14:01:38 +01005574 SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
5575 {
5576 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
5577 // TODO: We probably want to consider the memory scope here. For now,
5578 // just always emit the full fence.
5579 Fence(semantics);
5580 return EmitResult::Continue;
5581 }
5582
5583 void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
5584 {
5585 if (semantics == spv::MemorySemanticsMaskNone)
5586 {
5587 return; //no-op
5588 }
5589 rr::Fence(MemoryOrder(semantics));
5590 }
5591
Ben Clayton32d47972019-04-19 17:08:15 -04005592 SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
5593 {
5594 auto &type = getType(Type::ID(insn.word(1)));
5595 Object::ID resultId = insn.word(2);
Ben Claytonb16c5862019-05-08 14:01:38 +01005596 auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
Ben Clayton32d47972019-04-19 17:08:15 -04005597 ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
5598
5599 auto &dst = state->routine->createIntermediate(resultId, type.sizeInComponents);
5600
5601 switch (insn.opcode())
5602 {
5603 case spv::OpGroupNonUniformElect:
5604 {
5605 // Result is true only in the active invocation with the lowest id
5606 // in the group, otherwise result is false.
5607 SIMD::Int active = state->activeLaneMask();
5608 // TODO: Would be nice if we could write this as:
5609 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
5610 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
5611 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
5612 dst.move(0, elect);
5613 break;
5614 }
5615 default:
5616 UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
5617 }
5618 return EmitResult::Continue;
5619 }
5620
Ben Claytone4605da2019-05-09 16:24:01 +01005621 SpirvShader::EmitResult SpirvShader::EmitArrayLength(InsnIterator insn, EmitState *state) const
5622 {
5623 auto resultTyId = Type::ID(insn.word(1));
5624 auto resultId = Object::ID(insn.word(2));
5625 auto structPtrId = Object::ID(insn.word(3));
5626 auto arrayFieldIdx = insn.word(4);
5627
5628 auto &resultType = getType(resultTyId);
5629 ASSERT(resultType.sizeInComponents == 1);
5630 ASSERT(resultType.definition.opcode() == spv::OpTypeInt);
5631
5632 auto &structPtrTy = getType(getObject(structPtrId).type);
5633 auto &structTy = getType(structPtrTy.element);
5634 auto &arrayTy = getType(structTy.definition.word(2 + arrayFieldIdx));
5635 ASSERT(arrayTy.definition.opcode() == spv::OpTypeRuntimeArray);
5636 auto &arrayElTy = getType(arrayTy.element);
5637
5638 auto &result = state->routine->createIntermediate(resultId, 1);
5639 auto structBase = GetPointerToData(structPtrId, 0, state->routine);
5640
5641 Decorations d = {};
5642 ApplyDecorationsForIdMember(&d, structPtrTy.element, arrayFieldIdx);
5643 ASSERT(d.HasOffset);
5644
5645 auto arrayBase = structBase + d.Offset;
5646 auto arraySizeInBytes = SIMD::Int(arrayBase.limit) - arrayBase.offsets();
5647 auto arrayLength = arraySizeInBytes / SIMD::Int(arrayElTy.sizeInComponents * sizeof(float));
5648
5649 result.move(0, SIMD::Int(arrayLength));
5650
5651 return EmitResult::Continue;
5652 }
5653
Ben Claytonb16c5862019-05-08 14:01:38 +01005654 uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
Ben Clayton32d47972019-04-19 17:08:15 -04005655 {
5656 auto &scopeObj = getObject(id);
5657 ASSERT(scopeObj.kind == Object::Kind::Constant);
5658 ASSERT(getType(scopeObj.type).sizeInComponents == 1);
Ben Claytonb16c5862019-05-08 14:01:38 +01005659 return scopeObj.constantValue[0];
Ben Clayton32d47972019-04-19 17:08:15 -04005660 }
5661
Chris Forbesc61271e2019-02-19 17:01:28 -08005662 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
5663 {
5664 for (auto insn : *this)
5665 {
5666 switch (insn.opcode())
5667 {
5668 case spv::OpVariable:
5669 {
Ben Claytonaf973b62019-03-13 18:19:20 +00005670 Object::ID resultId = insn.word(2);
Chris Forbesc61271e2019-02-19 17:01:28 -08005671 auto &object = getObject(resultId);
Ben Clayton9a162482019-02-25 11:54:43 +00005672 auto &objectTy = getType(object.type);
5673 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
Chris Forbesc61271e2019-02-19 17:01:28 -08005674 {
Ben Clayton47747612019-04-04 16:27:35 +01005675 auto &dst = routine->getVariable(resultId);
Chris Forbesc61271e2019-02-19 17:01:28 -08005676 int offset = 0;
5677 VisitInterface(resultId,
5678 [&](Decorations const &d, AttribType type) {
5679 auto scalarSlot = d.Location << 2 | d.Component;
5680 routine->outputs[scalarSlot] = dst[offset++];
5681 });
5682 }
5683 break;
5684 }
5685 default:
5686 break;
5687 }
5688 }
5689 }
Ben Clayton76e9bc02019-02-26 15:02:18 +00005690
Ben Clayton64f78f52019-03-21 17:21:06 +00005691 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
5692 {
5693 // Default to a Simple, this may change later.
5694 kind = Block::Simple;
5695
5696 // Walk the instructions to find the last two of the block.
5697 InsnIterator insns[2];
5698 for (auto insn : *this)
5699 {
5700 insns[0] = insns[1];
5701 insns[1] = insn;
5702 }
5703
5704 switch (insns[1].opcode())
5705 {
5706 case spv::OpBranch:
5707 branchInstruction = insns[1];
5708 outs.emplace(Block::ID(branchInstruction.word(1)));
5709
5710 switch (insns[0].opcode())
5711 {
5712 case spv::OpLoopMerge:
5713 kind = Loop;
5714 mergeInstruction = insns[0];
5715 mergeBlock = Block::ID(mergeInstruction.word(1));
5716 continueTarget = Block::ID(mergeInstruction.word(2));
5717 break;
5718
5719 default:
5720 kind = Block::Simple;
5721 break;
5722 }
5723 break;
5724
5725 case spv::OpBranchConditional:
5726 branchInstruction = insns[1];
5727 outs.emplace(Block::ID(branchInstruction.word(2)));
5728 outs.emplace(Block::ID(branchInstruction.word(3)));
5729
5730 switch (insns[0].opcode())
5731 {
5732 case spv::OpSelectionMerge:
5733 kind = StructuredBranchConditional;
5734 mergeInstruction = insns[0];
5735 mergeBlock = Block::ID(mergeInstruction.word(1));
5736 break;
5737
5738 case spv::OpLoopMerge:
5739 kind = Loop;
5740 mergeInstruction = insns[0];
5741 mergeBlock = Block::ID(mergeInstruction.word(1));
5742 continueTarget = Block::ID(mergeInstruction.word(2));
5743 break;
5744
5745 default:
5746 kind = UnstructuredBranchConditional;
5747 break;
5748 }
5749 break;
5750
5751 case spv::OpSwitch:
5752 branchInstruction = insns[1];
5753 outs.emplace(Block::ID(branchInstruction.word(2)));
5754 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
5755 {
5756 outs.emplace(Block::ID(branchInstruction.word(w)));
5757 }
5758
5759 switch (insns[0].opcode())
5760 {
5761 case spv::OpSelectionMerge:
5762 kind = StructuredSwitch;
5763 mergeInstruction = insns[0];
5764 mergeBlock = Block::ID(mergeInstruction.word(1));
5765 break;
5766
5767 default:
5768 kind = UnstructuredSwitch;
5769 break;
5770 }
5771 break;
5772
5773 default:
5774 break;
5775 }
5776 }
Ben Claytonc0cf68b2019-03-21 17:46:08 +00005777
Ben Clayton513ed1d2019-03-28 16:07:00 +00005778 bool SpirvShader::existsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
Ben Claytone747b3c2019-03-21 19:35:15 +00005779 {
5780 // TODO: Optimize: This can be cached on the block.
5781 Block::Set seen;
Ben Clayton513ed1d2019-03-28 16:07:00 +00005782 seen.emplace(notPassingThrough);
Ben Claytone747b3c2019-03-21 19:35:15 +00005783
5784 std::queue<Block::ID> pending;
5785 pending.emplace(from);
5786
5787 while (pending.size() > 0)
5788 {
5789 auto id = pending.front();
5790 pending.pop();
5791 for (auto out : getBlock(id).outs)
5792 {
5793 if (seen.count(out) != 0) { continue; }
5794 if (out == to) { return true; }
5795 pending.emplace(out);
5796 }
5797 seen.emplace(id);
5798 }
5799
5800 return false;
5801 }
5802
Ben Claytonc0cf68b2019-03-21 17:46:08 +00005803 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
5804 {
5805 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
5806 }
5807
5808 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
5809 {
5810 auto edge = Block::Edge{from, to};
5811 auto it = edgeActiveLaneMasks.find(edge);
5812 if (it == edgeActiveLaneMasks.end())
5813 {
5814 edgeActiveLaneMasks.emplace(edge, mask);
5815 }
5816 else
5817 {
5818 auto combined = it->second | mask;
5819 edgeActiveLaneMasks.erase(edge);
5820 edgeActiveLaneMasks.emplace(edge, combined);
5821 }
5822 }
5823
Ben Claytonfe3f0132019-03-26 11:10:16 +00005824 RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
Ben Claytonc0cf68b2019-03-21 17:46:08 +00005825 {
5826 auto edge = Block::Edge{from, to};
Ben Claytonfe3f0132019-03-26 11:10:16 +00005827 auto it = state->edgeActiveLaneMasks.find(edge);
5828 ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
Ben Claytonc0cf68b2019-03-21 17:46:08 +00005829 return it->second;
5830 }
5831
Ben Clayton60f15ec2019-05-09 17:50:01 +01005832 VkShaderStageFlagBits SpirvShader::executionModelToStage(spv::ExecutionModel model)
5833 {
5834 switch (model)
5835 {
5836 case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
5837 // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
5838 // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
5839 // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
5840 case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
5841 case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
5842 // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
5843 // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
5844 // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
5845 // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
5846 // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
5847 // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
5848 // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
5849 // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
5850 // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
5851 default:
5852 UNSUPPORTED("ExecutionModel: %d", int(model));
5853 return VkShaderStageFlagBits(0);
5854 }
5855 }
5856
Ben Clayton76e9bc02019-02-26 15:02:18 +00005857 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
5858 pipelineLayout(pipelineLayout)
5859 {
5860 }
5861
Chris Forbesc25b8072018-12-10 15:10:39 -08005862}