Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 1 | // |
| 2 | // Copyright 2012 Francisco Jerez |
| 3 | // |
| 4 | // Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | // copy of this software and associated documentation files (the "Software"), |
| 6 | // to deal in the Software without restriction, including without limitation |
| 7 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | // and/or sell copies of the Software, and to permit persons to whom the |
| 9 | // Software is furnished to do so, subject to the following conditions: |
| 10 | // |
| 11 | // The above copyright notice and this permission notice shall be included in |
| 12 | // all copies or substantial portions of the Software. |
| 13 | // |
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
Kenneth Graunke | f0cb66b | 2013-04-21 13:52:08 -0700 | [diff] [blame] | 17 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 18 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 19 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 20 | // OTHER DEALINGS IN THE SOFTWARE. |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 21 | // |
| 22 | |
| 23 | #include "core/kernel.hpp" |
| 24 | #include "core/resource.hpp" |
Francisco Jerez | bf045bf | 2013-11-04 11:26:13 -0800 | [diff] [blame] | 25 | #include "util/factor.hpp" |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 26 | #include "util/u_math.h" |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 27 | #include "pipe/p_context.h" |
| 28 | |
| 29 | using namespace clover; |
| 30 | |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 31 | kernel::kernel(clover::program &prog, const std::string &name, |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 32 | const std::vector<module::argument> &margs) : |
Francisco Jerez | e9a4e74 | 2014-08-16 16:25:34 +0300 | [diff] [blame] | 33 | program(prog), _name(name), exec(*this), |
| 34 | program_ref(prog._kernel_ref_counter) { |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 35 | for (auto &marg : margs) { |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 36 | if (marg.semantic == module::argument::general) |
| 37 | _args.emplace_back(argument::create(marg)); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 38 | } |
Karol Herbst | adbfff6 | 2020-09-02 20:36:41 +0200 | [diff] [blame] | 39 | for (auto &dev : prog.devices()) { |
| 40 | auto &m = prog.build(dev).binary; |
| 41 | auto msym = find(name_equals(name), m.syms); |
| 42 | const auto f = id_type_equals(msym.section, module::section::data_constant); |
| 43 | if (!any_of(f, m.secs)) |
| 44 | continue; |
| 45 | |
| 46 | auto mconst = find(f, m.secs); |
| 47 | auto rb = std::make_unique<root_buffer>(prog.context(), |
| 48 | CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, |
| 49 | mconst.size, mconst.data.data()); |
| 50 | _constant_buffers.emplace(&dev, std::move(rb)); |
| 51 | } |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 52 | } |
| 53 | |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 54 | template<typename V> |
| 55 | static inline std::vector<uint> |
| 56 | pad_vector(command_queue &q, const V &v, uint x) { |
| 57 | std::vector<uint> w { v.begin(), v.end() }; |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 58 | w.resize(q.device().max_block_size().size(), x); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 59 | return w; |
| 60 | } |
| 61 | |
| 62 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 63 | kernel::launch(command_queue &q, |
| 64 | const std::vector<size_t> &grid_offset, |
| 65 | const std::vector<size_t> &grid_size, |
| 66 | const std::vector<size_t> &block_size) { |
Francisco Jerez | 1942490 | 2016-05-17 16:03:13 +0200 | [diff] [blame] | 67 | const auto m = program().build(q.device()).binary; |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 68 | const auto reduced_grid_size = |
| 69 | map(divides(), grid_size, block_size); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 70 | void *st = exec.bind(&q, grid_offset); |
Hans de Goede | 4d02e91 | 2016-03-14 15:01:05 +0100 | [diff] [blame] | 71 | struct pipe_grid_info info = {}; |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 72 | |
| 73 | // The handles are created during exec_context::bind(), so we need make |
| 74 | // sure to call exec_context::bind() before retrieving them. |
Karol Herbst | 6e035c0 | 2020-03-10 22:41:26 +0000 | [diff] [blame] | 75 | std::vector<uint32_t *> g_handles = map([&](size_t h) { |
| 76 | return (uint32_t *)&exec.input[h]; |
Francisco Jerez | 7d61769 | 2013-10-06 13:49:05 -0700 | [diff] [blame] | 77 | }, exec.g_handles); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 78 | |
| 79 | q.pipe->bind_compute_state(q.pipe, st); |
Brian Paul | 93e6694 | 2013-09-16 10:21:07 -0600 | [diff] [blame] | 80 | q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, |
| 81 | 0, exec.samplers.size(), |
| 82 | exec.samplers.data()); |
| 83 | |
David Heidelberger | 2901e2e | 2013-10-23 21:25:12 +0200 | [diff] [blame] | 84 | q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, |
| 85 | exec.sviews.size(), exec.sviews.data()); |
Karol Herbst | 3aead71 | 2020-10-07 23:08:43 +0200 | [diff] [blame] | 86 | q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0, |
| 87 | exec.iviews.size(), exec.iviews.data()); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 88 | q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 89 | exec.resources.data()); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 90 | q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), |
| 91 | exec.g_buffers.data(), g_handles.data()); |
| 92 | |
Samuel Pitoiset | bfd695e | 2016-01-12 18:00:00 +0100 | [diff] [blame] | 93 | // Fill information for the launch_grid() call. |
Hans de Goede | ef8e50a | 2016-04-22 14:47:05 +0200 | [diff] [blame] | 94 | info.work_dim = grid_size.size(); |
Serge Martin | a4cff18 | 2016-02-13 23:39:22 +0100 | [diff] [blame] | 95 | copy(pad_vector(q, block_size, 1), info.block); |
| 96 | copy(pad_vector(q, reduced_grid_size, 1), info.grid); |
| 97 | info.pc = find(name_equals(_name), m.syms).offset; |
Samuel Pitoiset | bfd695e | 2016-01-12 18:00:00 +0100 | [diff] [blame] | 98 | info.input = exec.input.data(); |
| 99 | |
| 100 | q.pipe->launch_grid(q.pipe, &info); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 101 | |
| 102 | q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); |
| 103 | q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); |
Karol Herbst | 3aead71 | 2020-10-07 23:08:43 +0200 | [diff] [blame] | 104 | q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0, |
| 105 | exec.iviews.size(), NULL); |
David Heidelberger | 2901e2e | 2013-10-23 21:25:12 +0200 | [diff] [blame] | 106 | q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, |
| 107 | exec.sviews.size(), NULL); |
Brian Paul | 93e6694 | 2013-09-16 10:21:07 -0600 | [diff] [blame] | 108 | q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, |
| 109 | exec.samplers.size(), NULL); |
Bas Nieuwenhuizen | be5899d | 2016-03-24 23:11:03 +0100 | [diff] [blame] | 110 | |
| 111 | q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 112 | exec.unbind(); |
| 113 | } |
| 114 | |
| 115 | size_t |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 116 | kernel::mem_local() const { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 117 | size_t sz = 0; |
| 118 | |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 119 | for (auto &arg : args()) { |
| 120 | if (dynamic_cast<local_argument *>(&arg)) |
| 121 | sz += arg.storage(); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 122 | } |
| 123 | |
| 124 | return sz; |
| 125 | } |
| 126 | |
| 127 | size_t |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 128 | kernel::mem_private() const { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 129 | return 0; |
| 130 | } |
| 131 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 132 | const std::string & |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 133 | kernel::name() const { |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 134 | return _name; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 135 | } |
| 136 | |
| 137 | std::vector<size_t> |
Francisco Jerez | bf045bf | 2013-11-04 11:26:13 -0800 | [diff] [blame] | 138 | kernel::optimal_block_size(const command_queue &q, |
| 139 | const std::vector<size_t> &grid_size) const { |
| 140 | return factor::find_grid_optimal_factor<size_t>( |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 141 | q.device().max_threads_per_block(), q.device().max_block_size(), |
Francisco Jerez | bf045bf | 2013-11-04 11:26:13 -0800 | [diff] [blame] | 142 | grid_size); |
| 143 | } |
| 144 | |
| 145 | std::vector<size_t> |
| 146 | kernel::required_block_size() const { |
Serge Martin | c04d5e7 | 2020-09-27 15:45:33 +0200 | [diff] [blame] | 147 | return find(name_equals(_name), program().symbols()).reqd_work_group_size; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 148 | } |
| 149 | |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 150 | kernel::argument_range |
| 151 | kernel::args() { |
| 152 | return map(derefs(), _args); |
| 153 | } |
| 154 | |
| 155 | kernel::const_argument_range |
| 156 | kernel::args() const { |
| 157 | return map(derefs(), _args); |
| 158 | } |
| 159 | |
Serge Martin | 9aea6e3 | 2020-05-09 23:13:48 +0200 | [diff] [blame] | 160 | std::vector<clover::module::arg_info> |
| 161 | kernel::args_infos() { |
| 162 | std::vector<clover::module::arg_info> infos; |
| 163 | for (auto &marg: find(name_equals(_name), program().symbols()).args) |
| 164 | if (marg.semantic == clover::module::argument::general) |
| 165 | infos.emplace_back(marg.info); |
| 166 | |
| 167 | return infos; |
| 168 | } |
| 169 | |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 170 | const module & |
| 171 | kernel::module(const command_queue &q) const { |
Francisco Jerez | 1942490 | 2016-05-17 16:03:13 +0200 | [diff] [blame] | 172 | return program().build(q.device()).binary; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 173 | } |
| 174 | |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 175 | kernel::exec_context::exec_context(kernel &kern) : |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 176 | kern(kern), q(NULL), mem_local(0), st(NULL), cs() { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 177 | } |
| 178 | |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 179 | kernel::exec_context::~exec_context() { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 180 | if (st) |
| 181 | q->pipe->delete_compute_state(q->pipe, st); |
| 182 | } |
| 183 | |
| 184 | void * |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 185 | kernel::exec_context::bind(intrusive_ptr<command_queue> _q, |
| 186 | const std::vector<size_t> &grid_offset) { |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 187 | std::swap(q, _q); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 188 | |
Francisco Jerez | a3dcab4 | 2013-07-21 00:43:18 +0200 | [diff] [blame] | 189 | // Bind kernel arguments. |
Francisco Jerez | 1942490 | 2016-05-17 16:03:13 +0200 | [diff] [blame] | 190 | auto &m = kern.program().build(q->device()).binary; |
Karol Herbst | c8cd8e2 | 2019-05-10 09:24:42 +0200 | [diff] [blame] | 191 | auto msym = find(name_equals(kern.name()), m.syms); |
| 192 | auto margs = msym.args; |
Karol Herbst | adbfff6 | 2020-09-02 20:36:41 +0200 | [diff] [blame] | 193 | auto msec = find(id_type_equals(msym.section, module::section::text_executable), m.secs); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 194 | auto explicit_arg = kern._args.begin(); |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 195 | |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 196 | for (auto &marg : margs) { |
| 197 | switch (marg.semantic) { |
| 198 | case module::argument::general: |
| 199 | (*(explicit_arg++))->bind(*this, marg); |
Francisco Jerez | 2286edc | 2014-10-12 11:32:48 +0300 | [diff] [blame] | 200 | break; |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 201 | |
| 202 | case module::argument::grid_dimension: { |
| 203 | const cl_uint dimension = grid_offset.size(); |
| 204 | auto arg = argument::create(marg); |
| 205 | |
| 206 | arg->set(sizeof(dimension), &dimension); |
| 207 | arg->bind(*this, marg); |
Francisco Jerez | 2286edc | 2014-10-12 11:32:48 +0300 | [diff] [blame] | 208 | break; |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 209 | } |
| 210 | case module::argument::grid_offset: { |
Jan Vesely | 40c6d54 | 2016-05-15 20:08:09 -0400 | [diff] [blame] | 211 | for (cl_uint x : pad_vector(*q, grid_offset, 0)) { |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 212 | auto arg = argument::create(marg); |
| 213 | |
| 214 | arg->set(sizeof(x), &x); |
| 215 | arg->bind(*this, marg); |
| 216 | } |
Francisco Jerez | 2286edc | 2014-10-12 11:32:48 +0300 | [diff] [blame] | 217 | break; |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 218 | } |
Zoltan Gilian | 9ef5b7a | 2015-07-27 11:34:07 +0200 | [diff] [blame] | 219 | case module::argument::image_size: { |
| 220 | auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get(); |
| 221 | std::vector<cl_uint> image_size{ |
| 222 | static_cast<cl_uint>(img->width()), |
| 223 | static_cast<cl_uint>(img->height()), |
| 224 | static_cast<cl_uint>(img->depth())}; |
| 225 | for (auto x : image_size) { |
| 226 | auto arg = argument::create(marg); |
| 227 | |
| 228 | arg->set(sizeof(x), &x); |
| 229 | arg->bind(*this, marg); |
| 230 | } |
| 231 | break; |
| 232 | } |
| 233 | case module::argument::image_format: { |
| 234 | auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get(); |
| 235 | cl_image_format fmt = img->format(); |
| 236 | std::vector<cl_uint> image_format{ |
| 237 | static_cast<cl_uint>(fmt.image_channel_data_type), |
| 238 | static_cast<cl_uint>(fmt.image_channel_order)}; |
| 239 | for (auto x : image_format) { |
| 240 | auto arg = argument::create(marg); |
| 241 | |
| 242 | arg->set(sizeof(x), &x); |
| 243 | arg->bind(*this, marg); |
| 244 | } |
| 245 | break; |
| 246 | } |
Karol Herbst | adbfff6 | 2020-09-02 20:36:41 +0200 | [diff] [blame] | 247 | case module::argument::constant_buffer: { |
| 248 | auto arg = argument::create(marg); |
| 249 | cl_mem buf = kern._constant_buffers.at(&q->device()).get(); |
| 250 | arg->set(q->device().address_bits() / 8, &buf); |
| 251 | arg->bind(*this, marg); |
| 252 | break; |
| 253 | } |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 254 | } |
| 255 | } |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 256 | |
| 257 | // Create a new compute state if anything changed. |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 258 | if (!st || q != _q || |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 259 | cs.req_local_mem != mem_local || |
| 260 | cs.req_input_mem != input.size()) { |
| 261 | if (st) |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 262 | _q->pipe->delete_compute_state(_q->pipe, st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 263 | |
Bas Nieuwenhuizen | ea8f4a6 | 2016-03-17 14:15:39 +0100 | [diff] [blame] | 264 | cs.ir_type = q->device().ir_format(); |
EdB | d8f817a | 2015-04-23 20:13:51 +0200 | [diff] [blame] | 265 | cs.prog = &(msec.data[0]); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 266 | cs.req_local_mem = mem_local; |
| 267 | cs.req_input_mem = input.size(); |
| 268 | st = q->pipe->create_compute_state(q->pipe, &cs); |
Jan Vesely | 154fbd0 | 2018-07-17 02:07:45 -0400 | [diff] [blame] | 269 | if (!st) { |
Pierre Moreau | 1c9fdce | 2019-02-02 15:33:51 +0100 | [diff] [blame] | 270 | unbind(); // Cleanup |
| 271 | throw error(CL_OUT_OF_RESOURCES); |
Jan Vesely | 154fbd0 | 2018-07-17 02:07:45 -0400 | [diff] [blame] | 272 | } |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 273 | } |
| 274 | |
| 275 | return st; |
| 276 | } |
| 277 | |
| 278 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 279 | kernel::exec_context::unbind() { |
Francisco Jerez | 7a9bbff | 2013-09-16 21:50:40 -0700 | [diff] [blame] | 280 | for (auto &arg : kern.args()) |
| 281 | arg.unbind(*this); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 282 | |
| 283 | input.clear(); |
| 284 | samplers.clear(); |
| 285 | sviews.clear(); |
Karol Herbst | 3aead71 | 2020-10-07 23:08:43 +0200 | [diff] [blame] | 286 | iviews.clear(); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 287 | resources.clear(); |
| 288 | g_buffers.clear(); |
| 289 | g_handles.clear(); |
| 290 | mem_local = 0; |
| 291 | } |
| 292 | |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 293 | namespace { |
| 294 | template<typename T> |
| 295 | std::vector<uint8_t> |
| 296 | bytes(const T& x) { |
| 297 | return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) }; |
| 298 | } |
| 299 | |
| 300 | /// |
| 301 | /// Transform buffer \a v from the native byte order into the byte |
| 302 | /// order specified by \a e. |
| 303 | /// |
| 304 | template<typename T> |
| 305 | void |
| 306 | byteswap(T &v, pipe_endian e) { |
| 307 | if (PIPE_ENDIAN_NATIVE != e) |
| 308 | std::reverse(v.begin(), v.end()); |
| 309 | } |
| 310 | |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 311 | /// |
| 312 | /// Pad buffer \a v to the next multiple of \a n. |
| 313 | /// |
| 314 | template<typename T> |
| 315 | void |
| 316 | align(T &v, size_t n) { |
| 317 | v.resize(util_align_npot(v.size(), n)); |
| 318 | } |
| 319 | |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 320 | bool |
| 321 | msb(const std::vector<uint8_t> &s) { |
| 322 | if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE) |
| 323 | return s.back() & 0x80; |
| 324 | else |
| 325 | return s.front() & 0x80; |
| 326 | } |
| 327 | |
| 328 | /// |
| 329 | /// Resize buffer \a v to size \a n using sign or zero extension |
| 330 | /// according to \a ext. |
| 331 | /// |
| 332 | template<typename T> |
| 333 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 334 | extend(T &v, enum module::argument::ext_type ext, size_t n) { |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 335 | const size_t m = std::min(v.size(), n); |
| 336 | const bool sign_ext = (ext == module::argument::sign_ext); |
| 337 | const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0); |
| 338 | T w(n, fill); |
| 339 | |
| 340 | if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE) |
| 341 | std::copy_n(v.begin(), m, w.begin()); |
| 342 | else |
| 343 | std::copy_n(v.end() - m, m, w.end() - m); |
| 344 | |
| 345 | std::swap(v, w); |
| 346 | } |
| 347 | |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 348 | /// |
| 349 | /// Append buffer \a w to \a v. |
| 350 | /// |
| 351 | template<typename T> |
| 352 | void |
| 353 | insert(T &v, const T &w) { |
| 354 | v.insert(v.end(), w.begin(), w.end()); |
| 355 | } |
| 356 | |
| 357 | /// |
| 358 | /// Append \a n elements to the end of buffer \a v. |
| 359 | /// |
| 360 | template<typename T> |
| 361 | size_t |
| 362 | allocate(T &v, size_t n) { |
| 363 | size_t pos = v.size(); |
| 364 | v.resize(pos + n); |
| 365 | return pos; |
| 366 | } |
| 367 | } |
| 368 | |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 369 | std::unique_ptr<kernel::argument> |
| 370 | kernel::argument::create(const module::argument &marg) { |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 371 | switch (marg.type) { |
| 372 | case module::argument::scalar: |
| 373 | return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size)); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 374 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 375 | case module::argument::global: |
| 376 | return std::unique_ptr<kernel::argument>(new global_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 377 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 378 | case module::argument::local: |
| 379 | return std::unique_ptr<kernel::argument>(new local_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 380 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 381 | case module::argument::constant: |
| 382 | return std::unique_ptr<kernel::argument>(new constant_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 383 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 384 | case module::argument::image2d_rd: |
| 385 | case module::argument::image3d_rd: |
| 386 | return std::unique_ptr<kernel::argument>(new image_rd_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 387 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 388 | case module::argument::image2d_wr: |
| 389 | case module::argument::image3d_wr: |
| 390 | return std::unique_ptr<kernel::argument>(new image_wr_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 391 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 392 | case module::argument::sampler: |
| 393 | return std::unique_ptr<kernel::argument>(new sampler_argument); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 394 | |
Jan Vesely | 3a18fc6 | 2014-12-05 19:05:30 -0500 | [diff] [blame] | 395 | } |
| 396 | throw error(CL_INVALID_KERNEL_DEFINITION); |
Francisco Jerez | bf89a97 | 2014-10-08 17:39:35 +0300 | [diff] [blame] | 397 | } |
| 398 | |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 399 | kernel::argument::argument() : _set(false) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 400 | } |
| 401 | |
| 402 | bool |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 403 | kernel::argument::set() const { |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 404 | return _set; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 405 | } |
| 406 | |
| 407 | size_t |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 408 | kernel::argument::storage() const { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 409 | return 0; |
| 410 | } |
| 411 | |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 412 | kernel::scalar_argument::scalar_argument(size_t size) : size(size) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 413 | } |
| 414 | |
| 415 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 416 | kernel::scalar_argument::set(size_t size, const void *value) { |
Zoltan Gilian | be3622d | 2015-07-30 23:35:09 +0200 | [diff] [blame] | 417 | if (!value) |
| 418 | throw error(CL_INVALID_ARG_VALUE); |
| 419 | |
Francisco Jerez | a3dcab4 | 2013-07-21 00:43:18 +0200 | [diff] [blame] | 420 | if (size != this->size) |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 421 | throw error(CL_INVALID_ARG_SIZE); |
| 422 | |
| 423 | v = { (uint8_t *)value, (uint8_t *)value + size }; |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 424 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 425 | } |
| 426 | |
| 427 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 428 | kernel::scalar_argument::bind(exec_context &ctx, |
| 429 | const module::argument &marg) { |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 430 | auto w = v; |
| 431 | |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 432 | extend(w, marg.ext_type, marg.target_size); |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 433 | byteswap(w, ctx.q->device().endianness()); |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 434 | align(ctx.input, marg.target_align); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 435 | insert(ctx.input, w); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 436 | } |
| 437 | |
| 438 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 439 | kernel::scalar_argument::unbind(exec_context &ctx) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 440 | } |
| 441 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 442 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 443 | kernel::global_argument::set(size_t size, const void *value) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 444 | if (size != sizeof(cl_mem)) |
| 445 | throw error(CL_INVALID_ARG_SIZE); |
| 446 | |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 447 | buf = pobj<buffer>(value ? *(cl_mem *)value : NULL); |
Karol Herbst | d6754eb | 2018-03-12 11:04:53 +0100 | [diff] [blame] | 448 | svm = nullptr; |
| 449 | _set = true; |
| 450 | } |
| 451 | |
| 452 | void |
| 453 | kernel::global_argument::set_svm(const void *value) { |
| 454 | svm = value; |
| 455 | buf = nullptr; |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 456 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 457 | } |
| 458 | |
| 459 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 460 | kernel::global_argument::bind(exec_context &ctx, |
| 461 | const module::argument &marg) { |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 462 | align(ctx.input, marg.target_align); |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 463 | |
| 464 | if (buf) { |
Serge Martin | c0f03f6 | 2020-05-09 08:11:16 +0200 | [diff] [blame] | 465 | const resource &r = buf->resource_in(*ctx.q); |
Tom Stellard | 945d87f | 2014-02-13 14:46:25 -0800 | [diff] [blame] | 466 | ctx.g_handles.push_back(ctx.input.size()); |
| 467 | ctx.g_buffers.push_back(r.pipe); |
| 468 | |
| 469 | // How to handle multi-demensional offsets? |
| 470 | // We don't need to. Buffer offsets are always |
| 471 | // one-dimensional. |
| 472 | auto v = bytes(r.offset[0]); |
| 473 | extend(v, marg.ext_type, marg.target_size); |
| 474 | byteswap(v, ctx.q->device().endianness()); |
| 475 | insert(ctx.input, v); |
Karol Herbst | d6754eb | 2018-03-12 11:04:53 +0100 | [diff] [blame] | 476 | } else if (svm) { |
| 477 | auto v = bytes(svm); |
| 478 | extend(v, marg.ext_type, marg.target_size); |
| 479 | byteswap(v, ctx.q->device().endianness()); |
| 480 | insert(ctx.input, v); |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 481 | } else { |
| 482 | // Null pointer. |
| 483 | allocate(ctx.input, marg.target_size); |
| 484 | } |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 485 | } |
| 486 | |
| 487 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 488 | kernel::global_argument::unbind(exec_context &ctx) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 489 | } |
| 490 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 491 | size_t |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 492 | kernel::local_argument::storage() const { |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 493 | return _storage; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 494 | } |
| 495 | |
| 496 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 497 | kernel::local_argument::set(size_t size, const void *value) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 498 | if (value) |
| 499 | throw error(CL_INVALID_ARG_VALUE); |
| 500 | |
Zoltan Gilian | be3622d | 2015-07-30 23:35:09 +0200 | [diff] [blame] | 501 | if (!size) |
| 502 | throw error(CL_INVALID_ARG_SIZE); |
| 503 | |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 504 | _storage = size; |
| 505 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 506 | } |
| 507 | |
| 508 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 509 | kernel::local_argument::bind(exec_context &ctx, |
| 510 | const module::argument &marg) { |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 511 | auto v = bytes(ctx.mem_local); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 512 | |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 513 | extend(v, module::argument::zero_ext, marg.target_size); |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 514 | byteswap(v, ctx.q->device().endianness()); |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 515 | align(ctx.input, marg.target_align); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 516 | insert(ctx.input, v); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 517 | |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 518 | ctx.mem_local += _storage; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 519 | } |
| 520 | |
| 521 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 522 | kernel::local_argument::unbind(exec_context &ctx) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 523 | } |
| 524 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 525 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 526 | kernel::constant_argument::set(size_t size, const void *value) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 527 | if (size != sizeof(cl_mem)) |
| 528 | throw error(CL_INVALID_ARG_SIZE); |
| 529 | |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 530 | buf = pobj<buffer>(value ? *(cl_mem *)value : NULL); |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 531 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 532 | } |
| 533 | |
| 534 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 535 | kernel::constant_argument::bind(exec_context &ctx, |
| 536 | const module::argument &marg) { |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 537 | align(ctx.input, marg.target_align); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 538 | |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 539 | if (buf) { |
Serge Martin | c0f03f6 | 2020-05-09 08:11:16 +0200 | [diff] [blame] | 540 | resource &r = buf->resource_in(*ctx.q); |
Tom Stellard | 945d87f | 2014-02-13 14:46:25 -0800 | [diff] [blame] | 541 | auto v = bytes(ctx.resources.size() << 24 | r.offset[0]); |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 542 | |
| 543 | extend(v, module::argument::zero_ext, marg.target_size); |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 544 | byteswap(v, ctx.q->device().endianness()); |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 545 | insert(ctx.input, v); |
| 546 | |
Tom Stellard | 945d87f | 2014-02-13 14:46:25 -0800 | [diff] [blame] | 547 | st = r.bind_surface(*ctx.q, false); |
Jan Vesely | 6ec2109 | 2014-01-16 20:22:14 -0500 | [diff] [blame] | 548 | ctx.resources.push_back(st); |
| 549 | } else { |
| 550 | // Null pointer. |
| 551 | allocate(ctx.input, marg.target_size); |
| 552 | } |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 553 | } |
| 554 | |
| 555 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 556 | kernel::constant_argument::unbind(exec_context &ctx) { |
Francisco Jerez | 198cd13 | 2014-02-16 19:35:11 +0100 | [diff] [blame] | 557 | if (buf) |
Serge Martin | c0f03f6 | 2020-05-09 08:11:16 +0200 | [diff] [blame] | 558 | buf->resource_in(*ctx.q).unbind_surface(*ctx.q, st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 559 | } |
| 560 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 561 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 562 | kernel::image_rd_argument::set(size_t size, const void *value) { |
Zoltan Gilian | be3622d | 2015-07-30 23:35:09 +0200 | [diff] [blame] | 563 | if (!value) |
| 564 | throw error(CL_INVALID_ARG_VALUE); |
| 565 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 566 | if (size != sizeof(cl_mem)) |
| 567 | throw error(CL_INVALID_ARG_SIZE); |
| 568 | |
Francisco Jerez | d6f7afc | 2013-10-01 12:00:51 -0700 | [diff] [blame] | 569 | img = &obj<image>(*(cl_mem *)value); |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 570 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 571 | } |
| 572 | |
| 573 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 574 | kernel::image_rd_argument::bind(exec_context &ctx, |
| 575 | const module::argument &marg) { |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 576 | auto v = bytes(ctx.sviews.size()); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 577 | |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 578 | extend(v, module::argument::zero_ext, marg.target_size); |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 579 | byteswap(v, ctx.q->device().endianness()); |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 580 | align(ctx.input, marg.target_align); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 581 | insert(ctx.input, v); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 582 | |
Serge Martin | c0f03f6 | 2020-05-09 08:11:16 +0200 | [diff] [blame] | 583 | st = img->resource_in(*ctx.q).bind_sampler_view(*ctx.q); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 584 | ctx.sviews.push_back(st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 585 | } |
| 586 | |
| 587 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 588 | kernel::image_rd_argument::unbind(exec_context &ctx) { |
Serge Martin | c0f03f6 | 2020-05-09 08:11:16 +0200 | [diff] [blame] | 589 | img->resource_in(*ctx.q).unbind_sampler_view(*ctx.q, st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 590 | } |
| 591 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 592 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 593 | kernel::image_wr_argument::set(size_t size, const void *value) { |
Zoltan Gilian | be3622d | 2015-07-30 23:35:09 +0200 | [diff] [blame] | 594 | if (!value) |
| 595 | throw error(CL_INVALID_ARG_VALUE); |
| 596 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 597 | if (size != sizeof(cl_mem)) |
| 598 | throw error(CL_INVALID_ARG_SIZE); |
| 599 | |
Francisco Jerez | d6f7afc | 2013-10-01 12:00:51 -0700 | [diff] [blame] | 600 | img = &obj<image>(*(cl_mem *)value); |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 601 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 602 | } |
| 603 | |
| 604 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 605 | kernel::image_wr_argument::bind(exec_context &ctx, |
| 606 | const module::argument &marg) { |
Karol Herbst | 3aead71 | 2020-10-07 23:08:43 +0200 | [diff] [blame] | 607 | auto v = bytes(ctx.iviews.size()); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 608 | |
Francisco Jerez | f64c0ca | 2013-07-22 23:08:46 +0200 | [diff] [blame] | 609 | extend(v, module::argument::zero_ext, marg.target_size); |
Francisco Jerez | c4578d2 | 2014-02-18 15:07:11 +0100 | [diff] [blame] | 610 | byteswap(v, ctx.q->device().endianness()); |
Francisco Jerez | df53082 | 2013-07-21 01:14:54 +0200 | [diff] [blame] | 611 | align(ctx.input, marg.target_align); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 612 | insert(ctx.input, v); |
Karol Herbst | 3aead71 | 2020-10-07 23:08:43 +0200 | [diff] [blame] | 613 | ctx.iviews.push_back(img->resource_in(*ctx.q).create_image_view(*ctx.q)); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 614 | } |
| 615 | |
| 616 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 617 | kernel::image_wr_argument::unbind(exec_context &ctx) { |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 618 | } |
| 619 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 620 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 621 | kernel::sampler_argument::set(size_t size, const void *value) { |
Zoltan Gilian | be3622d | 2015-07-30 23:35:09 +0200 | [diff] [blame] | 622 | if (!value) |
| 623 | throw error(CL_INVALID_SAMPLER); |
| 624 | |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 625 | if (size != sizeof(cl_sampler)) |
| 626 | throw error(CL_INVALID_ARG_SIZE); |
| 627 | |
Francisco Jerez | 04d0ab9 | 2013-09-15 22:20:43 -0700 | [diff] [blame] | 628 | s = &obj(*(cl_sampler *)value); |
Francisco Jerez | 8e14b82 | 2013-09-17 23:13:48 -0700 | [diff] [blame] | 629 | _set = true; |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 630 | } |
| 631 | |
| 632 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 633 | kernel::sampler_argument::bind(exec_context &ctx, |
| 634 | const module::argument &marg) { |
| 635 | st = s->bind(*ctx.q); |
Francisco Jerez | 829caf4 | 2013-07-21 01:06:13 +0200 | [diff] [blame] | 636 | ctx.samplers.push_back(st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 637 | } |
| 638 | |
| 639 | void |
Francisco Jerez | 35307f5 | 2013-09-17 23:20:11 -0700 | [diff] [blame] | 640 | kernel::sampler_argument::unbind(exec_context &ctx) { |
| 641 | s->unbind(*ctx.q, st); |
Francisco Jerez | c6db1b3 | 2012-04-20 16:56:19 +0200 | [diff] [blame] | 642 | } |