blob: 76a03b152e5efea7981786e98c0b50784ada33ea [file] [log] [blame]
Francisco Jerezc6db1b32012-04-20 16:56:19 +02001//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
Kenneth Graunkef0cb66b2013-04-21 13:52:08 -070017// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20// OTHER DEALINGS IN THE SOFTWARE.
Francisco Jerezc6db1b32012-04-20 16:56:19 +020021//
22
23#include "core/kernel.hpp"
24#include "core/resource.hpp"
Francisco Jerezbf045bf2013-11-04 11:26:13 -080025#include "util/factor.hpp"
Francisco Jerezdf530822013-07-21 01:14:54 +020026#include "util/u_math.h"
Francisco Jerezc6db1b32012-04-20 16:56:19 +020027#include "pipe/p_context.h"
28
29using namespace clover;
30
Francisco Jerezc4578d22014-02-18 15:07:11 +010031kernel::kernel(clover::program &prog, const std::string &name,
Francisco Jerez35307f52013-09-17 23:20:11 -070032 const std::vector<module::argument> &margs) :
Francisco Jereze9a4e742014-08-16 16:25:34 +030033 program(prog), _name(name), exec(*this),
34 program_ref(prog._kernel_ref_counter) {
Francisco Jerez7a9bbff2013-09-16 21:50:40 -070035 for (auto &marg : margs) {
Francisco Jerezbf89a972014-10-08 17:39:35 +030036 if (marg.semantic == module::argument::general)
37 _args.emplace_back(argument::create(marg));
Francisco Jerezc6db1b32012-04-20 16:56:19 +020038 }
Karol Herbstadbfff62020-09-02 20:36:41 +020039 for (auto &dev : prog.devices()) {
40 auto &m = prog.build(dev).binary;
41 auto msym = find(name_equals(name), m.syms);
42 const auto f = id_type_equals(msym.section, module::section::data_constant);
43 if (!any_of(f, m.secs))
44 continue;
45
46 auto mconst = find(f, m.secs);
47 auto rb = std::make_unique<root_buffer>(prog.context(),
48 CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY,
49 mconst.size, mconst.data.data());
50 _constant_buffers.emplace(&dev, std::move(rb));
51 }
Francisco Jerezc6db1b32012-04-20 16:56:19 +020052}
53
Francisco Jerez7a9bbff2013-09-16 21:50:40 -070054template<typename V>
55static inline std::vector<uint>
56pad_vector(command_queue &q, const V &v, uint x) {
57 std::vector<uint> w { v.begin(), v.end() };
Francisco Jerezc4578d22014-02-18 15:07:11 +010058 w.resize(q.device().max_block_size().size(), x);
Francisco Jerezc6db1b32012-04-20 16:56:19 +020059 return w;
60}
61
62void
Francisco Jerez35307f52013-09-17 23:20:11 -070063kernel::launch(command_queue &q,
64 const std::vector<size_t> &grid_offset,
65 const std::vector<size_t> &grid_size,
66 const std::vector<size_t> &block_size) {
Francisco Jerez19424902016-05-17 16:03:13 +020067 const auto m = program().build(q.device()).binary;
Francisco Jerez7a9bbff2013-09-16 21:50:40 -070068 const auto reduced_grid_size =
69 map(divides(), grid_size, block_size);
Francisco Jerezbf89a972014-10-08 17:39:35 +030070 void *st = exec.bind(&q, grid_offset);
Hans de Goede4d02e912016-03-14 15:01:05 +010071 struct pipe_grid_info info = {};
Francisco Jerez7a9bbff2013-09-16 21:50:40 -070072
73 // The handles are created during exec_context::bind(), so we need make
74 // sure to call exec_context::bind() before retrieving them.
Karol Herbst6e035c02020-03-10 22:41:26 +000075 std::vector<uint32_t *> g_handles = map([&](size_t h) {
76 return (uint32_t *)&exec.input[h];
Francisco Jerez7d617692013-10-06 13:49:05 -070077 }, exec.g_handles);
Francisco Jerezc6db1b32012-04-20 16:56:19 +020078
79 q.pipe->bind_compute_state(q.pipe, st);
Brian Paul93e66942013-09-16 10:21:07 -060080 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
81 0, exec.samplers.size(),
82 exec.samplers.data());
83
David Heidelberger2901e2e2013-10-23 21:25:12 +020084 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
85 exec.sviews.size(), exec.sviews.data());
Karol Herbst3aead712020-10-07 23:08:43 +020086 q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0,
87 exec.iviews.size(), exec.iviews.data());
Francisco Jerezc6db1b32012-04-20 16:56:19 +020088 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
Francisco Jerez35307f52013-09-17 23:20:11 -070089 exec.resources.data());
Francisco Jerezc6db1b32012-04-20 16:56:19 +020090 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
91 exec.g_buffers.data(), g_handles.data());
92
Samuel Pitoisetbfd695e2016-01-12 18:00:00 +010093 // Fill information for the launch_grid() call.
Hans de Goedeef8e50a2016-04-22 14:47:05 +020094 info.work_dim = grid_size.size();
Serge Martina4cff182016-02-13 23:39:22 +010095 copy(pad_vector(q, block_size, 1), info.block);
96 copy(pad_vector(q, reduced_grid_size, 1), info.grid);
97 info.pc = find(name_equals(_name), m.syms).offset;
Samuel Pitoisetbfd695e2016-01-12 18:00:00 +010098 info.input = exec.input.data();
99
100 q.pipe->launch_grid(q.pipe, &info);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200101
102 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
103 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
Karol Herbst3aead712020-10-07 23:08:43 +0200104 q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0,
105 exec.iviews.size(), NULL);
David Heidelberger2901e2e2013-10-23 21:25:12 +0200106 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
107 exec.sviews.size(), NULL);
Brian Paul93e66942013-09-16 10:21:07 -0600108 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
109 exec.samplers.size(), NULL);
Bas Nieuwenhuizenbe5899d2016-03-24 23:11:03 +0100110
111 q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200112 exec.unbind();
113}
114
115size_t
Francisco Jerez35307f52013-09-17 23:20:11 -0700116kernel::mem_local() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200117 size_t sz = 0;
118
Francisco Jerez7a9bbff2013-09-16 21:50:40 -0700119 for (auto &arg : args()) {
120 if (dynamic_cast<local_argument *>(&arg))
121 sz += arg.storage();
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200122 }
123
124 return sz;
125}
126
127size_t
Francisco Jerez35307f52013-09-17 23:20:11 -0700128kernel::mem_private() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200129 return 0;
130}
131
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200132const std::string &
Francisco Jerez35307f52013-09-17 23:20:11 -0700133kernel::name() const {
Francisco Jerez8e14b822013-09-17 23:13:48 -0700134 return _name;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200135}
136
137std::vector<size_t>
Francisco Jerezbf045bf2013-11-04 11:26:13 -0800138kernel::optimal_block_size(const command_queue &q,
139 const std::vector<size_t> &grid_size) const {
140 return factor::find_grid_optimal_factor<size_t>(
Francisco Jerezc4578d22014-02-18 15:07:11 +0100141 q.device().max_threads_per_block(), q.device().max_block_size(),
Francisco Jerezbf045bf2013-11-04 11:26:13 -0800142 grid_size);
143}
144
145std::vector<size_t>
146kernel::required_block_size() const {
Serge Martinc04d5e72020-09-27 15:45:33 +0200147 return find(name_equals(_name), program().symbols()).reqd_work_group_size;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200148}
149
Francisco Jerez7a9bbff2013-09-16 21:50:40 -0700150kernel::argument_range
151kernel::args() {
152 return map(derefs(), _args);
153}
154
155kernel::const_argument_range
156kernel::args() const {
157 return map(derefs(), _args);
158}
159
Serge Martin9aea6e32020-05-09 23:13:48 +0200160std::vector<clover::module::arg_info>
161kernel::args_infos() {
162 std::vector<clover::module::arg_info> infos;
163 for (auto &marg: find(name_equals(_name), program().symbols()).args)
164 if (marg.semantic == clover::module::argument::general)
165 infos.emplace_back(marg.info);
166
167 return infos;
168}
169
Francisco Jerez35307f52013-09-17 23:20:11 -0700170const module &
171kernel::module(const command_queue &q) const {
Francisco Jerez19424902016-05-17 16:03:13 +0200172 return program().build(q.device()).binary;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200173}
174
Francisco Jerez35307f52013-09-17 23:20:11 -0700175kernel::exec_context::exec_context(kernel &kern) :
Francisco Jerez7a9bbff2013-09-16 21:50:40 -0700176 kern(kern), q(NULL), mem_local(0), st(NULL), cs() {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200177}
178
Francisco Jerez35307f52013-09-17 23:20:11 -0700179kernel::exec_context::~exec_context() {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200180 if (st)
181 q->pipe->delete_compute_state(q->pipe, st);
182}
183
184void *
Francisco Jerezbf89a972014-10-08 17:39:35 +0300185kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
186 const std::vector<size_t> &grid_offset) {
Francisco Jerez8e14b822013-09-17 23:13:48 -0700187 std::swap(q, _q);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200188
Francisco Jereza3dcab42013-07-21 00:43:18 +0200189 // Bind kernel arguments.
Francisco Jerez19424902016-05-17 16:03:13 +0200190 auto &m = kern.program().build(q->device()).binary;
Karol Herbstc8cd8e22019-05-10 09:24:42 +0200191 auto msym = find(name_equals(kern.name()), m.syms);
192 auto margs = msym.args;
Karol Herbstadbfff62020-09-02 20:36:41 +0200193 auto msec = find(id_type_equals(msym.section, module::section::text_executable), m.secs);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300194 auto explicit_arg = kern._args.begin();
Francisco Jerez7a9bbff2013-09-16 21:50:40 -0700195
Francisco Jerezbf89a972014-10-08 17:39:35 +0300196 for (auto &marg : margs) {
197 switch (marg.semantic) {
198 case module::argument::general:
199 (*(explicit_arg++))->bind(*this, marg);
Francisco Jerez2286edc2014-10-12 11:32:48 +0300200 break;
Francisco Jerezbf89a972014-10-08 17:39:35 +0300201
202 case module::argument::grid_dimension: {
203 const cl_uint dimension = grid_offset.size();
204 auto arg = argument::create(marg);
205
206 arg->set(sizeof(dimension), &dimension);
207 arg->bind(*this, marg);
Francisco Jerez2286edc2014-10-12 11:32:48 +0300208 break;
Francisco Jerezbf89a972014-10-08 17:39:35 +0300209 }
210 case module::argument::grid_offset: {
Jan Vesely40c6d542016-05-15 20:08:09 -0400211 for (cl_uint x : pad_vector(*q, grid_offset, 0)) {
Francisco Jerezbf89a972014-10-08 17:39:35 +0300212 auto arg = argument::create(marg);
213
214 arg->set(sizeof(x), &x);
215 arg->bind(*this, marg);
216 }
Francisco Jerez2286edc2014-10-12 11:32:48 +0300217 break;
Francisco Jerezbf89a972014-10-08 17:39:35 +0300218 }
Zoltan Gilian9ef5b7a2015-07-27 11:34:07 +0200219 case module::argument::image_size: {
220 auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
221 std::vector<cl_uint> image_size{
222 static_cast<cl_uint>(img->width()),
223 static_cast<cl_uint>(img->height()),
224 static_cast<cl_uint>(img->depth())};
225 for (auto x : image_size) {
226 auto arg = argument::create(marg);
227
228 arg->set(sizeof(x), &x);
229 arg->bind(*this, marg);
230 }
231 break;
232 }
233 case module::argument::image_format: {
234 auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
235 cl_image_format fmt = img->format();
236 std::vector<cl_uint> image_format{
237 static_cast<cl_uint>(fmt.image_channel_data_type),
238 static_cast<cl_uint>(fmt.image_channel_order)};
239 for (auto x : image_format) {
240 auto arg = argument::create(marg);
241
242 arg->set(sizeof(x), &x);
243 arg->bind(*this, marg);
244 }
245 break;
246 }
Karol Herbstadbfff62020-09-02 20:36:41 +0200247 case module::argument::constant_buffer: {
248 auto arg = argument::create(marg);
249 cl_mem buf = kern._constant_buffers.at(&q->device()).get();
250 arg->set(q->device().address_bits() / 8, &buf);
251 arg->bind(*this, marg);
252 break;
253 }
Francisco Jerezbf89a972014-10-08 17:39:35 +0300254 }
255 }
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200256
257 // Create a new compute state if anything changed.
Francisco Jerez8e14b822013-09-17 23:13:48 -0700258 if (!st || q != _q ||
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200259 cs.req_local_mem != mem_local ||
260 cs.req_input_mem != input.size()) {
261 if (st)
Francisco Jerez8e14b822013-09-17 23:13:48 -0700262 _q->pipe->delete_compute_state(_q->pipe, st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200263
Bas Nieuwenhuizenea8f4a62016-03-17 14:15:39 +0100264 cs.ir_type = q->device().ir_format();
EdBd8f817a2015-04-23 20:13:51 +0200265 cs.prog = &(msec.data[0]);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200266 cs.req_local_mem = mem_local;
267 cs.req_input_mem = input.size();
268 st = q->pipe->create_compute_state(q->pipe, &cs);
Jan Vesely154fbd02018-07-17 02:07:45 -0400269 if (!st) {
Pierre Moreau1c9fdce2019-02-02 15:33:51 +0100270 unbind(); // Cleanup
271 throw error(CL_OUT_OF_RESOURCES);
Jan Vesely154fbd02018-07-17 02:07:45 -0400272 }
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200273 }
274
275 return st;
276}
277
278void
Francisco Jerez35307f52013-09-17 23:20:11 -0700279kernel::exec_context::unbind() {
Francisco Jerez7a9bbff2013-09-16 21:50:40 -0700280 for (auto &arg : kern.args())
281 arg.unbind(*this);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200282
283 input.clear();
284 samplers.clear();
285 sviews.clear();
Karol Herbst3aead712020-10-07 23:08:43 +0200286 iviews.clear();
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200287 resources.clear();
288 g_buffers.clear();
289 g_handles.clear();
290 mem_local = 0;
291}
292
Francisco Jerez829caf42013-07-21 01:06:13 +0200293namespace {
294 template<typename T>
295 std::vector<uint8_t>
296 bytes(const T& x) {
297 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
298 }
299
300 ///
301 /// Transform buffer \a v from the native byte order into the byte
302 /// order specified by \a e.
303 ///
304 template<typename T>
305 void
306 byteswap(T &v, pipe_endian e) {
307 if (PIPE_ENDIAN_NATIVE != e)
308 std::reverse(v.begin(), v.end());
309 }
310
Francisco Jerezdf530822013-07-21 01:14:54 +0200311 ///
312 /// Pad buffer \a v to the next multiple of \a n.
313 ///
314 template<typename T>
315 void
316 align(T &v, size_t n) {
317 v.resize(util_align_npot(v.size(), n));
318 }
319
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200320 bool
321 msb(const std::vector<uint8_t> &s) {
322 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
323 return s.back() & 0x80;
324 else
325 return s.front() & 0x80;
326 }
327
328 ///
329 /// Resize buffer \a v to size \a n using sign or zero extension
330 /// according to \a ext.
331 ///
332 template<typename T>
333 void
Francisco Jerez35307f52013-09-17 23:20:11 -0700334 extend(T &v, enum module::argument::ext_type ext, size_t n) {
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200335 const size_t m = std::min(v.size(), n);
336 const bool sign_ext = (ext == module::argument::sign_ext);
337 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
338 T w(n, fill);
339
340 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
341 std::copy_n(v.begin(), m, w.begin());
342 else
343 std::copy_n(v.end() - m, m, w.end() - m);
344
345 std::swap(v, w);
346 }
347
Francisco Jerez829caf42013-07-21 01:06:13 +0200348 ///
349 /// Append buffer \a w to \a v.
350 ///
351 template<typename T>
352 void
353 insert(T &v, const T &w) {
354 v.insert(v.end(), w.begin(), w.end());
355 }
356
357 ///
358 /// Append \a n elements to the end of buffer \a v.
359 ///
360 template<typename T>
361 size_t
362 allocate(T &v, size_t n) {
363 size_t pos = v.size();
364 v.resize(pos + n);
365 return pos;
366 }
367}
368
Francisco Jerezbf89a972014-10-08 17:39:35 +0300369std::unique_ptr<kernel::argument>
370kernel::argument::create(const module::argument &marg) {
Jan Vesely3a18fc62014-12-05 19:05:30 -0500371 switch (marg.type) {
372 case module::argument::scalar:
373 return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size));
Francisco Jerezbf89a972014-10-08 17:39:35 +0300374
Jan Vesely3a18fc62014-12-05 19:05:30 -0500375 case module::argument::global:
376 return std::unique_ptr<kernel::argument>(new global_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300377
Jan Vesely3a18fc62014-12-05 19:05:30 -0500378 case module::argument::local:
379 return std::unique_ptr<kernel::argument>(new local_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300380
Jan Vesely3a18fc62014-12-05 19:05:30 -0500381 case module::argument::constant:
382 return std::unique_ptr<kernel::argument>(new constant_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300383
Jan Vesely3a18fc62014-12-05 19:05:30 -0500384 case module::argument::image2d_rd:
385 case module::argument::image3d_rd:
386 return std::unique_ptr<kernel::argument>(new image_rd_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300387
Jan Vesely3a18fc62014-12-05 19:05:30 -0500388 case module::argument::image2d_wr:
389 case module::argument::image3d_wr:
390 return std::unique_ptr<kernel::argument>(new image_wr_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300391
Jan Vesely3a18fc62014-12-05 19:05:30 -0500392 case module::argument::sampler:
393 return std::unique_ptr<kernel::argument>(new sampler_argument);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300394
Jan Vesely3a18fc62014-12-05 19:05:30 -0500395 }
396 throw error(CL_INVALID_KERNEL_DEFINITION);
Francisco Jerezbf89a972014-10-08 17:39:35 +0300397}
398
Francisco Jerez35307f52013-09-17 23:20:11 -0700399kernel::argument::argument() : _set(false) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200400}
401
402bool
Francisco Jerez35307f52013-09-17 23:20:11 -0700403kernel::argument::set() const {
Francisco Jerez8e14b822013-09-17 23:13:48 -0700404 return _set;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200405}
406
407size_t
Francisco Jerez35307f52013-09-17 23:20:11 -0700408kernel::argument::storage() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200409 return 0;
410}
411
Francisco Jerez35307f52013-09-17 23:20:11 -0700412kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200413}
414
415void
Francisco Jerez35307f52013-09-17 23:20:11 -0700416kernel::scalar_argument::set(size_t size, const void *value) {
Zoltan Gilianbe3622d2015-07-30 23:35:09 +0200417 if (!value)
418 throw error(CL_INVALID_ARG_VALUE);
419
Francisco Jereza3dcab42013-07-21 00:43:18 +0200420 if (size != this->size)
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200421 throw error(CL_INVALID_ARG_SIZE);
422
423 v = { (uint8_t *)value, (uint8_t *)value + size };
Francisco Jerez8e14b822013-09-17 23:13:48 -0700424 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200425}
426
427void
Francisco Jerez35307f52013-09-17 23:20:11 -0700428kernel::scalar_argument::bind(exec_context &ctx,
429 const module::argument &marg) {
Francisco Jerez829caf42013-07-21 01:06:13 +0200430 auto w = v;
431
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200432 extend(w, marg.ext_type, marg.target_size);
Francisco Jerezc4578d22014-02-18 15:07:11 +0100433 byteswap(w, ctx.q->device().endianness());
Francisco Jerezdf530822013-07-21 01:14:54 +0200434 align(ctx.input, marg.target_align);
Francisco Jerez829caf42013-07-21 01:06:13 +0200435 insert(ctx.input, w);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200436}
437
438void
Francisco Jerez35307f52013-09-17 23:20:11 -0700439kernel::scalar_argument::unbind(exec_context &ctx) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200440}
441
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200442void
Francisco Jerez35307f52013-09-17 23:20:11 -0700443kernel::global_argument::set(size_t size, const void *value) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200444 if (size != sizeof(cl_mem))
445 throw error(CL_INVALID_ARG_SIZE);
446
Jan Vesely6ec21092014-01-16 20:22:14 -0500447 buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
Karol Herbstd6754eb2018-03-12 11:04:53 +0100448 svm = nullptr;
449 _set = true;
450}
451
452void
453kernel::global_argument::set_svm(const void *value) {
454 svm = value;
455 buf = nullptr;
Francisco Jerez8e14b822013-09-17 23:13:48 -0700456 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200457}
458
459void
Francisco Jerez35307f52013-09-17 23:20:11 -0700460kernel::global_argument::bind(exec_context &ctx,
461 const module::argument &marg) {
Francisco Jerezdf530822013-07-21 01:14:54 +0200462 align(ctx.input, marg.target_align);
Jan Vesely6ec21092014-01-16 20:22:14 -0500463
464 if (buf) {
Serge Martinc0f03f62020-05-09 08:11:16 +0200465 const resource &r = buf->resource_in(*ctx.q);
Tom Stellard945d87f2014-02-13 14:46:25 -0800466 ctx.g_handles.push_back(ctx.input.size());
467 ctx.g_buffers.push_back(r.pipe);
468
469 // How to handle multi-demensional offsets?
470 // We don't need to. Buffer offsets are always
471 // one-dimensional.
472 auto v = bytes(r.offset[0]);
473 extend(v, marg.ext_type, marg.target_size);
474 byteswap(v, ctx.q->device().endianness());
475 insert(ctx.input, v);
Karol Herbstd6754eb2018-03-12 11:04:53 +0100476 } else if (svm) {
477 auto v = bytes(svm);
478 extend(v, marg.ext_type, marg.target_size);
479 byteswap(v, ctx.q->device().endianness());
480 insert(ctx.input, v);
Jan Vesely6ec21092014-01-16 20:22:14 -0500481 } else {
482 // Null pointer.
483 allocate(ctx.input, marg.target_size);
484 }
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200485}
486
487void
Francisco Jerez35307f52013-09-17 23:20:11 -0700488kernel::global_argument::unbind(exec_context &ctx) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200489}
490
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200491size_t
Francisco Jerez35307f52013-09-17 23:20:11 -0700492kernel::local_argument::storage() const {
Francisco Jerez8e14b822013-09-17 23:13:48 -0700493 return _storage;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200494}
495
496void
Francisco Jerez35307f52013-09-17 23:20:11 -0700497kernel::local_argument::set(size_t size, const void *value) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200498 if (value)
499 throw error(CL_INVALID_ARG_VALUE);
500
Zoltan Gilianbe3622d2015-07-30 23:35:09 +0200501 if (!size)
502 throw error(CL_INVALID_ARG_SIZE);
503
Francisco Jerez8e14b822013-09-17 23:13:48 -0700504 _storage = size;
505 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200506}
507
508void
Francisco Jerez35307f52013-09-17 23:20:11 -0700509kernel::local_argument::bind(exec_context &ctx,
510 const module::argument &marg) {
Francisco Jerez829caf42013-07-21 01:06:13 +0200511 auto v = bytes(ctx.mem_local);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200512
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200513 extend(v, module::argument::zero_ext, marg.target_size);
Francisco Jerezc4578d22014-02-18 15:07:11 +0100514 byteswap(v, ctx.q->device().endianness());
Francisco Jerezdf530822013-07-21 01:14:54 +0200515 align(ctx.input, marg.target_align);
Francisco Jerez829caf42013-07-21 01:06:13 +0200516 insert(ctx.input, v);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200517
Francisco Jerez8e14b822013-09-17 23:13:48 -0700518 ctx.mem_local += _storage;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200519}
520
521void
Francisco Jerez35307f52013-09-17 23:20:11 -0700522kernel::local_argument::unbind(exec_context &ctx) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200523}
524
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200525void
Francisco Jerez35307f52013-09-17 23:20:11 -0700526kernel::constant_argument::set(size_t size, const void *value) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200527 if (size != sizeof(cl_mem))
528 throw error(CL_INVALID_ARG_SIZE);
529
Jan Vesely6ec21092014-01-16 20:22:14 -0500530 buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
Francisco Jerez8e14b822013-09-17 23:13:48 -0700531 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200532}
533
534void
Francisco Jerez35307f52013-09-17 23:20:11 -0700535kernel::constant_argument::bind(exec_context &ctx,
536 const module::argument &marg) {
Francisco Jerezdf530822013-07-21 01:14:54 +0200537 align(ctx.input, marg.target_align);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200538
Jan Vesely6ec21092014-01-16 20:22:14 -0500539 if (buf) {
Serge Martinc0f03f62020-05-09 08:11:16 +0200540 resource &r = buf->resource_in(*ctx.q);
Tom Stellard945d87f2014-02-13 14:46:25 -0800541 auto v = bytes(ctx.resources.size() << 24 | r.offset[0]);
Jan Vesely6ec21092014-01-16 20:22:14 -0500542
543 extend(v, module::argument::zero_ext, marg.target_size);
Francisco Jerezc4578d22014-02-18 15:07:11 +0100544 byteswap(v, ctx.q->device().endianness());
Jan Vesely6ec21092014-01-16 20:22:14 -0500545 insert(ctx.input, v);
546
Tom Stellard945d87f2014-02-13 14:46:25 -0800547 st = r.bind_surface(*ctx.q, false);
Jan Vesely6ec21092014-01-16 20:22:14 -0500548 ctx.resources.push_back(st);
549 } else {
550 // Null pointer.
551 allocate(ctx.input, marg.target_size);
552 }
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200553}
554
555void
Francisco Jerez35307f52013-09-17 23:20:11 -0700556kernel::constant_argument::unbind(exec_context &ctx) {
Francisco Jerez198cd132014-02-16 19:35:11 +0100557 if (buf)
Serge Martinc0f03f62020-05-09 08:11:16 +0200558 buf->resource_in(*ctx.q).unbind_surface(*ctx.q, st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200559}
560
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200561void
Francisco Jerez35307f52013-09-17 23:20:11 -0700562kernel::image_rd_argument::set(size_t size, const void *value) {
Zoltan Gilianbe3622d2015-07-30 23:35:09 +0200563 if (!value)
564 throw error(CL_INVALID_ARG_VALUE);
565
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200566 if (size != sizeof(cl_mem))
567 throw error(CL_INVALID_ARG_SIZE);
568
Francisco Jerezd6f7afc2013-10-01 12:00:51 -0700569 img = &obj<image>(*(cl_mem *)value);
Francisco Jerez8e14b822013-09-17 23:13:48 -0700570 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200571}
572
573void
Francisco Jerez35307f52013-09-17 23:20:11 -0700574kernel::image_rd_argument::bind(exec_context &ctx,
575 const module::argument &marg) {
Francisco Jerez829caf42013-07-21 01:06:13 +0200576 auto v = bytes(ctx.sviews.size());
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200577
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200578 extend(v, module::argument::zero_ext, marg.target_size);
Francisco Jerezc4578d22014-02-18 15:07:11 +0100579 byteswap(v, ctx.q->device().endianness());
Francisco Jerezdf530822013-07-21 01:14:54 +0200580 align(ctx.input, marg.target_align);
Francisco Jerez829caf42013-07-21 01:06:13 +0200581 insert(ctx.input, v);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200582
Serge Martinc0f03f62020-05-09 08:11:16 +0200583 st = img->resource_in(*ctx.q).bind_sampler_view(*ctx.q);
Francisco Jerez829caf42013-07-21 01:06:13 +0200584 ctx.sviews.push_back(st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200585}
586
587void
Francisco Jerez35307f52013-09-17 23:20:11 -0700588kernel::image_rd_argument::unbind(exec_context &ctx) {
Serge Martinc0f03f62020-05-09 08:11:16 +0200589 img->resource_in(*ctx.q).unbind_sampler_view(*ctx.q, st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200590}
591
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200592void
Francisco Jerez35307f52013-09-17 23:20:11 -0700593kernel::image_wr_argument::set(size_t size, const void *value) {
Zoltan Gilianbe3622d2015-07-30 23:35:09 +0200594 if (!value)
595 throw error(CL_INVALID_ARG_VALUE);
596
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200597 if (size != sizeof(cl_mem))
598 throw error(CL_INVALID_ARG_SIZE);
599
Francisco Jerezd6f7afc2013-10-01 12:00:51 -0700600 img = &obj<image>(*(cl_mem *)value);
Francisco Jerez8e14b822013-09-17 23:13:48 -0700601 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200602}
603
604void
Francisco Jerez35307f52013-09-17 23:20:11 -0700605kernel::image_wr_argument::bind(exec_context &ctx,
606 const module::argument &marg) {
Karol Herbst3aead712020-10-07 23:08:43 +0200607 auto v = bytes(ctx.iviews.size());
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200608
Francisco Jerezf64c0ca2013-07-22 23:08:46 +0200609 extend(v, module::argument::zero_ext, marg.target_size);
Francisco Jerezc4578d22014-02-18 15:07:11 +0100610 byteswap(v, ctx.q->device().endianness());
Francisco Jerezdf530822013-07-21 01:14:54 +0200611 align(ctx.input, marg.target_align);
Francisco Jerez829caf42013-07-21 01:06:13 +0200612 insert(ctx.input, v);
Karol Herbst3aead712020-10-07 23:08:43 +0200613 ctx.iviews.push_back(img->resource_in(*ctx.q).create_image_view(*ctx.q));
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200614}
615
616void
Francisco Jerez35307f52013-09-17 23:20:11 -0700617kernel::image_wr_argument::unbind(exec_context &ctx) {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200618}
619
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200620void
Francisco Jerez35307f52013-09-17 23:20:11 -0700621kernel::sampler_argument::set(size_t size, const void *value) {
Zoltan Gilianbe3622d2015-07-30 23:35:09 +0200622 if (!value)
623 throw error(CL_INVALID_SAMPLER);
624
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200625 if (size != sizeof(cl_sampler))
626 throw error(CL_INVALID_ARG_SIZE);
627
Francisco Jerez04d0ab92013-09-15 22:20:43 -0700628 s = &obj(*(cl_sampler *)value);
Francisco Jerez8e14b822013-09-17 23:13:48 -0700629 _set = true;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200630}
631
632void
Francisco Jerez35307f52013-09-17 23:20:11 -0700633kernel::sampler_argument::bind(exec_context &ctx,
634 const module::argument &marg) {
635 st = s->bind(*ctx.q);
Francisco Jerez829caf42013-07-21 01:06:13 +0200636 ctx.samplers.push_back(st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200637}
638
639void
Francisco Jerez35307f52013-09-17 23:20:11 -0700640kernel::sampler_argument::unbind(exec_context &ctx) {
641 s->unbind(*ctx.q, st);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200642}