blob: 012abbdf61aac3dc9ab29e85947f9f90c0c22ef4 [file] [log] [blame]
Ben Claytonf2be26a2019-03-08 12:02:05 +00001// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "ComputeProgram.hpp"
Chris Forbes548e3662019-04-25 10:00:06 -070016#include "Constants.hpp"
Ben Claytonf2be26a2019-03-08 12:02:05 +000017
18#include "Vulkan/VkDebug.hpp"
19#include "Vulkan/VkPipelineLayout.hpp"
20
Ben Claytonecfeede2019-05-08 08:51:01 +010021#include <queue>
22
Ben Claytonc2bb50b2019-03-13 14:28:32 +000023namespace
24{
25 enum { X, Y, Z };
26} // anonymous namespace
27
Ben Claytonf2be26a2019-03-08 12:02:05 +000028namespace sw
29{
Nicolas Capens09591b82019-04-08 22:51:08 -040030 ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
Ben Claytonf2be26a2019-03-08 12:02:05 +000031 : data(Arg<0>()),
32 routine(pipelineLayout),
33 shader(shader),
Nicolas Capens09591b82019-04-08 22:51:08 -040034 pipelineLayout(pipelineLayout),
35 descriptorSets(descriptorSets)
Ben Claytonf2be26a2019-03-08 12:02:05 +000036 {
37 }
38
39 ComputeProgram::~ComputeProgram()
40 {
41 }
42
43 void ComputeProgram::generate()
44 {
45 shader->emitProlog(&routine);
46 emit();
47 shader->emitEpilog(&routine);
48 }
49
Ben Clayton13dcbec2019-05-08 08:43:55 +010050 void ComputeProgram::setWorkgroupBuiltins(Int workgroupID[3])
Ben Claytonf2be26a2019-03-08 12:02:05 +000051 {
Ben Claytonc2bb50b2019-03-13 14:28:32 +000052 setInputBuiltin(spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
Ben Claytonf2be26a2019-03-08 12:02:05 +000053 {
Ben Clayton13dcbec2019-05-08 08:43:55 +010054 auto numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
Ben Claytonf2be26a2019-03-08 12:02:05 +000055 for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
56 {
57 value[builtin.FirstComponent + component] =
Ben Claytonc2bb50b2019-03-13 14:28:32 +000058 As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
Ben Claytonf2be26a2019-03-08 12:02:05 +000059 }
60 });
61
Chris Forbesf2564652019-03-19 09:06:19 -070062 setInputBuiltin(spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
63 {
64 for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
65 {
66 value[builtin.FirstComponent + component] =
Ben Clayton13dcbec2019-05-08 08:43:55 +010067 As<SIMD::Float>(SIMD::Int(workgroupID[component]));
Chris Forbesf2564652019-03-19 09:06:19 -070068 }
69 });
70
Ben Claytonc2bb50b2019-03-13 14:28:32 +000071 setInputBuiltin(spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
Ben Claytonf2be26a2019-03-08 12:02:05 +000072 {
Ben Clayton13dcbec2019-05-08 08:43:55 +010073 auto workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
Ben Claytonf2be26a2019-03-08 12:02:05 +000074 for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
75 {
76 value[builtin.FirstComponent + component] =
Ben Claytonc2bb50b2019-03-13 14:28:32 +000077 As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
Ben Claytonf2be26a2019-03-08 12:02:05 +000078 }
79 });
80
Ben Claytonc2bb50b2019-03-13 14:28:32 +000081 setInputBuiltin(spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
Ben Clayton5ceec2c2019-03-13 09:16:50 +000082 {
83 ASSERT(builtin.SizeInComponents == 1);
Ben Clayton13dcbec2019-05-08 08:43:55 +010084 auto subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
85 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupsPerWorkgroup));
Ben Clayton5ceec2c2019-03-13 09:16:50 +000086 });
87
Ben Claytonc2bb50b2019-03-13 14:28:32 +000088 setInputBuiltin(spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
Ben Clayton5ceec2c2019-03-13 09:16:50 +000089 {
90 ASSERT(builtin.SizeInComponents == 1);
Ben Clayton13dcbec2019-05-08 08:43:55 +010091 auto invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
92 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(invocationsPerSubgroup));
Ben Clayton5ceec2c2019-03-13 09:16:50 +000093 });
94
Ben Claytonc2bb50b2019-03-13 14:28:32 +000095 setInputBuiltin(spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
Ben Clayton5ceec2c2019-03-13 09:16:50 +000096 {
97 ASSERT(builtin.SizeInComponents == 1);
Ben Claytonc2bb50b2019-03-13 14:28:32 +000098 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
Ben Clayton5ceec2c2019-03-13 09:16:50 +000099 });
Ben Clayton2cd983d2019-05-10 11:30:09 +0100100
101 setInputBuiltin(spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
102 {
103 ASSERT(builtin.SizeInComponents == 1);
104 // Only a single physical device is supported.
105 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
106 });
Ben Clayton13dcbec2019-05-08 08:43:55 +0100107 }
Ben Claytonf2be26a2019-03-08 12:02:05 +0000108
Ben Clayton13dcbec2019-05-08 08:43:55 +0100109 void ComputeProgram::setSubgroupBuiltins(Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
110 {
111 Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
112 Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
113
114 // TODO: Fix Int4 swizzles so we can just use workgroupSize.x, workgroupSize.y.
115 Int workgroupSizeX = Extract(workgroupSize, X);
116 Int workgroupSizeY = Extract(workgroupSize, Y);
117
118 SIMD::Int localInvocationID[3];
Ben Claytonf2be26a2019-03-08 12:02:05 +0000119 {
Ben Clayton13dcbec2019-05-08 08:43:55 +0100120 SIMD::Int idx = localInvocationIndex;
121 localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
122 idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
123 localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
124 idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
125 localInvocationID[X] = idx;
126 }
127
128 setInputBuiltin(spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
129 {
130 ASSERT(builtin.SizeInComponents == 1);
131 value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
132 });
133
134 setInputBuiltin(spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
135 {
136 ASSERT(builtin.SizeInComponents == 1);
137 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex));
138 });
139
140 setInputBuiltin(spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
141 {
142 for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
143 {
144 value[builtin.FirstComponent + component] =
145 As<SIMD::Float>(localInvocationID[component]);
146 }
147 });
148
149 setInputBuiltin(spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
150 {
151 SIMD::Int wgID = 0;
152 wgID = Insert(wgID, workgroupID[X], X);
153 wgID = Insert(wgID, workgroupID[Y], Y);
154 wgID = Insert(wgID, workgroupID[Z], Z);
155 auto localBase = workgroupSize * wgID;
156 for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
157 {
158 auto globalInvocationID = SIMD::Int(Extract(localBase, component)) + localInvocationID[component];
159 value[builtin.FirstComponent + component] = As<SIMD::Float>(globalInvocationID);
160 }
161 });
162 }
163
164 void ComputeProgram::emit()
165 {
Ben Claytonecfeede2019-05-08 08:51:01 +0100166 Int workgroupX = Arg<1>();
167 Int workgroupY = Arg<2>();
168 Int workgroupZ = Arg<3>();
169 Pointer<Byte> workgroupMemory = Arg<4>();
170 Int firstSubgroup = Arg<5>();
171 Int subgroupCount = Arg<6>();
172
Ben Clayton13dcbec2019-05-08 08:43:55 +0100173 routine.descriptorSets = data + OFFSET(Data, descriptorSets);
174 routine.descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
175 routine.pushConstants = data + OFFSET(Data, pushConstants);
176 routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
Ben Claytonecfeede2019-05-08 08:51:01 +0100177 routine.workgroupMemory = workgroupMemory;
Ben Clayton13dcbec2019-05-08 08:43:55 +0100178
179 Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup));
180
181 Int workgroupID[3] = {workgroupX, workgroupY, workgroupZ};
182 setWorkgroupBuiltins(workgroupID);
183
184 For(Int i = 0, i < subgroupCount, i++)
185 {
186 auto subgroupIndex = firstSubgroup + i;
187
Ben Claytonc2bb50b2019-03-13 14:28:32 +0000188 // TODO: Replace SIMD::Int(0, 1, 2, 3) with SIMD-width equivalent
189 auto localInvocationIndex = SIMD::Int(subgroupIndex * SIMD::Width) + SIMD::Int(0, 1, 2, 3);
Ben Claytonf2be26a2019-03-08 12:02:05 +0000190
Ben Clayton13dcbec2019-05-08 08:43:55 +0100191 // Disable lanes where (invocationIDs >= invocationsPerWorkgroup)
192 auto activeLaneMask = CmpLT(localInvocationIndex, SIMD::Int(invocationsPerWorkgroup));
Ben Clayton49d81582019-03-12 20:05:04 +0000193
Ben Clayton13dcbec2019-05-08 08:43:55 +0100194 setSubgroupBuiltins(workgroupID, localInvocationIndex, subgroupIndex);
Ben Claytonf2be26a2019-03-08 12:02:05 +0000195
Nicolas Capens09591b82019-04-08 22:51:08 -0400196 shader->emit(&routine, activeLaneMask, descriptorSets);
Ben Claytonf2be26a2019-03-08 12:02:05 +0000197 }
198 }
199
Ben Claytonc2bb50b2019-03-13 14:28:32 +0000200 void ComputeProgram::setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)> cb)
Ben Claytonf2be26a2019-03-08 12:02:05 +0000201 {
202 auto it = shader->inputBuiltins.find(id);
203 if (it != shader->inputBuiltins.end())
204 {
205 const auto& builtin = it->second;
Ben Clayton47747612019-04-04 16:27:35 +0100206 cb(builtin, routine.getVariable(builtin.Id));
Ben Claytonf2be26a2019-03-08 12:02:05 +0000207 }
208 }
209
210 void ComputeProgram::run(
Ben Clayton225a1302019-04-02 12:28:22 +0100211 vk::DescriptorSet::Bindings const &descriptorSets,
212 vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
213 PushConstantStorage const &pushConstants,
Chris Forbes4a4c2592019-05-13 08:53:36 -0700214 uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
Ben Claytonf2be26a2019-03-08 12:02:05 +0000215 uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
216 {
Ben Clayton13dcbec2019-05-08 08:43:55 +0100217 auto &modes = shader->getModes();
218
219 auto invocationsPerSubgroup = SIMD::Width;
220 auto invocationsPerWorkgroup = modes.WorkgroupSizeX * modes.WorkgroupSizeY * modes.WorkgroupSizeZ;
221 auto subgroupsPerWorkgroup = (invocationsPerWorkgroup + invocationsPerSubgroup - 1) / invocationsPerSubgroup;
Ben Claytonf2be26a2019-03-08 12:02:05 +0000222
Ben Claytonecd38482019-04-19 17:11:08 -0400223 // We're sharing a buffer here across all workgroups.
Ben Claytonecfeede2019-05-08 08:51:01 +0100224 // We can only do this because we know a single workgroup is in flight
225 // at any time.
Ben Claytonecd38482019-04-19 17:11:08 -0400226 std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
227
Ben Claytonf2be26a2019-03-08 12:02:05 +0000228 Data data;
229 data.descriptorSets = descriptorSets;
Ben Clayton225a1302019-04-02 12:28:22 +0100230 data.descriptorDynamicOffsets = descriptorDynamicOffsets;
Ben Claytonc2bb50b2019-03-13 14:28:32 +0000231 data.numWorkgroups[X] = groupCountX;
232 data.numWorkgroups[Y] = groupCountY;
233 data.numWorkgroups[Z] = groupCountZ;
Ben Claytonf2be26a2019-03-08 12:02:05 +0000234 data.numWorkgroups[3] = 0;
Ben Clayton13dcbec2019-05-08 08:43:55 +0100235 data.workgroupSize[X] = modes.WorkgroupSizeX;
236 data.workgroupSize[Y] = modes.WorkgroupSizeY;
237 data.workgroupSize[Z] = modes.WorkgroupSizeZ;
238 data.workgroupSize[3] = 0;
239 data.invocationsPerSubgroup = invocationsPerSubgroup;
240 data.invocationsPerWorkgroup = invocationsPerWorkgroup;
241 data.subgroupsPerWorkgroup = subgroupsPerWorkgroup;
Chris Forbesa30de542019-03-18 18:51:55 -0700242 data.pushConstants = pushConstants;
Chris Forbes548e3662019-04-25 10:00:06 -0700243 data.constants = &sw::constants;
Ben Claytonf2be26a2019-03-08 12:02:05 +0000244
Chris Forbes4a4c2592019-05-13 08:53:36 -0700245 for (uint32_t groupZ = baseGroupZ; groupZ < baseGroupZ + groupCountZ; groupZ++)
Ben Claytonf2be26a2019-03-08 12:02:05 +0000246 {
Chris Forbes4a4c2592019-05-13 08:53:36 -0700247 for (uint32_t groupY = baseGroupY; groupY < baseGroupY + groupCountY; groupY++)
Ben Claytonf2be26a2019-03-08 12:02:05 +0000248 {
Chris Forbes4a4c2592019-05-13 08:53:36 -0700249 for (uint32_t groupX = baseGroupX; groupX < baseGroupX + groupCountX; groupX++)
Ben Claytonf2be26a2019-03-08 12:02:05 +0000250 {
Ben Claytonecfeede2019-05-08 08:51:01 +0100251
252 // TODO(bclayton): Split work across threads.
253 using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
254 std::queue<Coroutine> coroutines;
255
256 if (shader->getModes().ContainsControlBarriers)
257 {
258 // Make a function call per subgroup so each subgroup
259 // can yield, bringing all subgroups to the barrier
260 // together.
261 for(int subgroupIndex = 0; subgroupIndex < subgroupsPerWorkgroup; subgroupIndex++)
262 {
263 auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), subgroupIndex, 1);
264 coroutines.push(std::move(coroutine));
265 }
266 }
267 else
268 {
269 auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), 0, subgroupsPerWorkgroup);
270 coroutines.push(std::move(coroutine));
271 }
272
273 while (coroutines.size() > 0)
274 {
275 auto coroutine = std::move(coroutines.front());
276 coroutines.pop();
277
278 SpirvShader::YieldResult result;
279 if (coroutine->await(result))
280 {
281 // TODO: Consider result (when the enum is more than 1 entry).
282 coroutines.push(std::move(coroutine));
283 }
284 }
285
286 } // groupX
287 } // groupY
288 } // groupZ
Ben Claytonf2be26a2019-03-08 12:02:05 +0000289 }
Ben Clayton13dcbec2019-05-08 08:43:55 +0100290
291} // namespace sw