blob: 6f46222dbdcef97786260c4ec65d7f9431ffecb9 [file] [log] [blame]
Francisco Jerezc6db1b32012-04-20 16:56:19 +02001//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
Kenneth Graunkef0cb66b2013-04-21 13:52:08 -070017// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20// OTHER DEALINGS IN THE SOFTWARE.
Francisco Jerezc6db1b32012-04-20 16:56:19 +020021//
22
Serge Martindad042b2020-05-03 13:56:15 +020023#include <algorithm>
Aaron Watry5e253fe2017-08-16 20:44:41 -050024#include <unistd.h>
Francisco Jerezc6db1b32012-04-20 16:56:19 +020025#include "core/device.hpp"
Francisco Jerezc4578d22014-02-18 15:07:11 +010026#include "core/platform.hpp"
Francisco Jerezc6db1b32012-04-20 16:56:19 +020027#include "pipe/p_screen.h"
28#include "pipe/p_state.h"
Eric Anholt0c31fe92019-04-29 15:38:24 -070029#include "util/bitscan.h"
Aaron Watry95ae6c02017-08-09 22:02:30 -050030#include "util/u_debug.h"
Dave Airlief33b4172019-04-10 10:24:46 +100031#include "spirv/invocation.hpp"
32#include "nir/invocation.hpp"
33#include <fstream>
Francisco Jerezc6db1b32012-04-20 16:56:19 +020034
35using namespace clover;
36
37namespace {
38 template<typename T>
39 std::vector<T>
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +010040 get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
41 pipe_compute_cap cap) {
42 int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
Francisco Jerezc6db1b32012-04-20 16:56:19 +020043 std::vector<T> v(sz / sizeof(T));
44
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +010045 pipe->get_compute_param(pipe, ir_format, cap, &v.front());
Francisco Jerezc6db1b32012-04-20 16:56:19 +020046 return v;
47 }
48}
49
Francisco Jerezc9e009b2013-09-15 20:06:57 -070050device::device(clover::platform &platform, pipe_loader_device *ldev) :
Dave Airlief33b4172019-04-10 10:24:46 +100051 platform(platform), clc_cache(NULL), ldev(ldev) {
Nicolai Hähnleae7283d2017-08-03 15:02:09 +020052 pipe = pipe_loader_create_screen(ldev);
Karol Herbstdeb04ad2019-08-06 20:35:48 +020053 if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
54 if (supports_ir(PIPE_SHADER_IR_NATIVE))
55 return;
56#ifdef HAVE_CLOVER_SPIRV
Dave Airlief33b4172019-04-10 10:24:46 +100057 if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED)) {
Dave Airlie43390a52020-10-07 09:01:43 +100058 nir::check_for_libclc(*this);
Dave Airlief33b4172019-04-10 10:24:46 +100059 clc_cache = nir::create_clc_disk_cache();
Dave Airlie43390a52020-10-07 09:01:43 +100060 clc_nir = lazy<std::shared_ptr<nir_shader>>([&] () { std::string log; return std::shared_ptr<nir_shader>(nir::load_libclc_nir(*this, log), ralloc_free); });
Karol Herbstdeb04ad2019-08-06 20:35:48 +020061 return;
Dave Airlief33b4172019-04-10 10:24:46 +100062 }
Karol Herbstdeb04ad2019-08-06 20:35:48 +020063#endif
Tom Stellardc5f0c982014-05-08 21:08:32 -040064 }
Karol Herbstdeb04ad2019-08-06 20:35:48 +020065 if (pipe)
66 pipe->destroy(pipe);
67 throw error(CL_INVALID_DEVICE);
Francisco Jerezc6db1b32012-04-20 16:56:19 +020068}
69
Francisco Jerezc9e009b2013-09-15 20:06:57 -070070device::~device() {
Dave Airlief33b4172019-04-10 10:24:46 +100071 if (clc_cache)
72 disk_cache_destroy(clc_cache);
Francisco Jerezc6db1b32012-04-20 16:56:19 +020073 if (pipe)
74 pipe->destroy(pipe);
75 if (ldev)
76 pipe_loader_release(&ldev, 1);
77}
78
Francisco Jerez369419f2013-09-16 21:11:16 -070079bool
80device::operator==(const device &dev) const {
81 return this == &dev;
82}
83
Francisco Jerezc6db1b32012-04-20 16:56:19 +020084cl_device_type
Francisco Jerezc9e009b2013-09-15 20:06:57 -070085device::type() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +020086 switch (ldev->type) {
87 case PIPE_LOADER_DEVICE_SOFTWARE:
88 return CL_DEVICE_TYPE_CPU;
89 case PIPE_LOADER_DEVICE_PCI:
Emil Velikov26458422014-01-11 05:19:36 +000090 case PIPE_LOADER_DEVICE_PLATFORM:
Francisco Jerezc6db1b32012-04-20 16:56:19 +020091 return CL_DEVICE_TYPE_GPU;
92 default:
Francisco Jerez27c51b52014-10-08 17:29:14 +030093 unreachable("Unknown device type.");
Francisco Jerezc6db1b32012-04-20 16:56:19 +020094 }
95}
96
97cl_uint
Francisco Jerezc9e009b2013-09-15 20:06:57 -070098device::vendor_id() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +020099 switch (ldev->type) {
100 case PIPE_LOADER_DEVICE_SOFTWARE:
Emil Velikov26458422014-01-11 05:19:36 +0000101 case PIPE_LOADER_DEVICE_PLATFORM:
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200102 return 0;
103 case PIPE_LOADER_DEVICE_PCI:
Francisco Jerez03e3bc42012-05-16 15:43:29 +0200104 return ldev->u.pci.vendor_id;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200105 default:
Francisco Jerez27c51b52014-10-08 17:29:14 +0300106 unreachable("Unknown device type.");
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200107 }
108}
109
110size_t
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700111device::max_images_read() const {
Karol Herbstf6c46e82020-10-15 16:31:10 +0200112 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200113}
114
115size_t
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700116device::max_images_write() const {
Marek Olšákb73bec02015-07-05 14:34:13 +0200117 return PIPE_MAX_SHADER_IMAGES;
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200118}
119
Serge Martin05fcc732016-10-01 18:51:11 +0200120size_t
121device::max_image_buffer_size() const {
122 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);
123}
124
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200125cl_uint
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700126device::max_image_levels_2d() const {
Eric Anholt0c31fe92019-04-29 15:38:24 -0700127 return util_last_bit(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE));
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200128}
129
130cl_uint
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700131device::max_image_levels_3d() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200132 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
133}
134
Serge Martin05fcc732016-10-01 18:51:11 +0200135size_t
136device::max_image_array_number() const {
137 return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
138}
139
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200140cl_uint
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700141device::max_samplers() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200142 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
143 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
144}
145
146cl_ulong
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700147device::max_mem_global() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100148 return get_compute_param<uint64_t>(pipe, ir_format(),
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200149 PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
150}
151
152cl_ulong
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700153device::max_mem_local() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100154 return get_compute_param<uint64_t>(pipe, ir_format(),
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200155 PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
156}
157
158cl_ulong
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700159device::max_mem_input() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100160 return get_compute_param<uint64_t>(pipe, ir_format(),
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200161 PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
162}
163
164cl_ulong
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700165device::max_const_buffer_size() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200166 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
Marek Olšák04f2c882014-07-24 20:32:08 +0200167 PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200168}
169
170cl_uint
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700171device::max_const_buffers() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200172 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
173 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
174}
175
Christoph Bumiller5c9bccc2012-05-12 19:32:46 +0200176size_t
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700177device::max_threads_per_block() const {
Christoph Bumiller5c9bccc2012-05-12 19:32:46 +0200178 return get_compute_param<uint64_t>(
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100179 pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
Christoph Bumiller5c9bccc2012-05-12 19:32:46 +0200180}
181
Tom Stellard71682cf2012-09-17 14:29:49 +0000182cl_ulong
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700183device::max_mem_alloc_size() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100184 return get_compute_param<uint64_t>(pipe, ir_format(),
Tom Stellard71682cf2012-09-17 14:29:49 +0000185 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
186}
187
Tom Stellardca848e82014-04-18 16:28:41 +0200188cl_uint
189device::max_clock_frequency() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100190 return get_compute_param<uint32_t>(pipe, ir_format(),
Tom Stellardca848e82014-04-18 16:28:41 +0200191 PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
192}
193
Bruno Jiménez2a0dffa2014-05-30 17:31:12 +0200194cl_uint
195device::max_compute_units() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100196 return get_compute_param<uint32_t>(pipe, ir_format(),
Bruno Jiménez2a0dffa2014-05-30 17:31:12 +0200197 PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
198}
199
Tom Stellard0ec85872014-07-23 20:37:08 -0400200bool
201device::image_support() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100202 return get_compute_param<uint32_t>(pipe, ir_format(),
Tom Stellard0ec85872014-07-23 20:37:08 -0400203 PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
204}
205
Tom Stellardc97e9022014-07-02 15:42:43 -0400206bool
207device::has_doubles() const {
Nicolai Hähnlea020cb32017-01-27 10:35:13 +0100208 return pipe->get_param(pipe, PIPE_CAP_DOUBLES);
Tom Stellardc97e9022014-07-02 15:42:43 -0400209}
210
Aaron Watryd364ab42017-06-02 21:51:43 -0500211bool
Jan Veselyfdf0f1d2017-09-01 17:48:39 -0400212device::has_halves() const {
213 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
214 PIPE_SHADER_CAP_FP16);
215}
216
217bool
Jan Veselyf67ceef2017-09-20 16:06:10 -0400218device::has_int64_atomics() const {
219 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
220 PIPE_SHADER_CAP_INT64_ATOMICS);
221}
222
223bool
Aaron Watryd364ab42017-06-02 21:51:43 -0500224device::has_unified_memory() const {
225 return pipe->get_param(pipe, PIPE_CAP_UMA);
226}
227
Serge Martindad042b2020-05-03 13:56:15 +0200228size_t
Aaron Watry5e253fe2017-08-16 20:44:41 -0500229device::mem_base_addr_align() const {
Serge Martindad042b2020-05-03 13:56:15 +0200230 return std::max((size_t)sysconf(_SC_PAGESIZE), sizeof(cl_long) * 16);
Aaron Watry5e253fe2017-08-16 20:44:41 -0500231}
232
Karol Herbst035e8822018-05-21 12:19:42 +0200233cl_device_svm_capabilities
234device::svm_support() const {
235 // Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR
236 // interactions won't work according to spec as clover manages a GPU side
237 // copy of the host data.
238 //
239 // The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,
240 // but the application and/or the kernel updates the memory via SVM and not
241 // the cl_mem buffer.
242 // We can't even do proper tracking on what memory might have been accessed
243 // as the host ptr to the buffer could be within a SVM region, where through
244 // the CL API there is no reliable way of knowing if a certain cl_mem buffer
245 // was accessed by a kernel or not and the runtime can't reliably know from
246 // which side the GPU buffer content needs to be updated.
247 //
248 // Another unsolvable scenario is a cl_mem object passed by cl_mem reference
249 // and SVM pointer into the same kernel at the same time.
Karol Herbstc0f7f832020-05-05 15:09:50 +0200250 if (allows_user_pointers() && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
Karol Herbsta2186582019-05-22 22:34:09 +0200251 // we can emulate all lower levels if we support fine grain system
252 return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
253 CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
254 CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
Karol Herbst035e8822018-05-21 12:19:42 +0200255 return 0;
256}
257
Karol Herbstc0f7f832020-05-05 15:09:50 +0200258bool
259device::allows_user_pointers() const {
260 return pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) ||
261 pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY);
262}
263
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200264std::vector<size_t>
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700265device::max_block_size() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100266 auto v = get_compute_param<uint64_t>(pipe, ir_format(),
267 PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
Francisco Jerezb70736f2012-05-12 19:33:33 +0200268 return { v.begin(), v.end() };
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200269}
270
Grigori Goronzyd15b32e2015-05-28 13:01:51 +0200271cl_uint
272device::subgroup_size() const {
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100273 return get_compute_param<uint32_t>(pipe, ir_format(),
274 PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
Grigori Goronzyd15b32e2015-05-28 13:01:51 +0200275}
276
Jan Vesely083746b2016-08-28 04:08:15 -0400277cl_uint
278device::address_bits() const {
279 return get_compute_param<uint32_t>(pipe, ir_format(),
280 PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];
281}
282
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200283std::string
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700284device::device_name() const {
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200285 return pipe->get_name(pipe);
286}
287
288std::string
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700289device::vendor_name() const {
Giuseppe Bilotta7932b302015-03-22 07:21:02 +0100290 return pipe->get_device_vendor(pipe);
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200291}
292
Tom Stellard613323b2012-04-23 12:09:08 -0400293enum pipe_shader_ir
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700294device::ir_format() const {
Karol Herbstdeb04ad2019-08-06 20:35:48 +0200295 if (supports_ir(PIPE_SHADER_IR_NATIVE))
296 return PIPE_SHADER_IR_NATIVE;
297
298 assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));
299 return PIPE_SHADER_IR_NIR_SERIALIZED;
Tom Stellard613323b2012-04-23 12:09:08 -0400300}
301
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200302std::string
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700303device::ir_target() const {
304 std::vector<char> target = get_compute_param<char>(
Bas Nieuwenhuizen1a5c8c22016-03-25 02:06:50 +0100305 pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
Tom Stellard613323b2012-04-23 12:09:08 -0400306 return { target.data() };
Francisco Jerezc6db1b32012-04-20 16:56:19 +0200307}
Tom Stellard8c9d3c62013-07-09 21:21:40 -0700308
309enum pipe_endian
Francisco Jerezc9e009b2013-09-15 20:06:57 -0700310device::endianness() const {
Tom Stellard8c9d3c62013-07-09 21:21:40 -0700311 return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
312}
Aaron Watry293b3e02017-07-21 21:17:50 -0500313
314std::string
315device::device_version() const {
Aaron Watry95ae6c02017-08-09 22:02:30 -0500316 static const std::string device_version =
317 debug_get_option("CLOVER_DEVICE_VERSION_OVERRIDE", "1.1");
318 return device_version;
Aaron Watry293b3e02017-07-21 21:17:50 -0500319}
320
321std::string
322device::device_clc_version() const {
Aaron Watry95ae6c02017-08-09 22:02:30 -0500323 static const std::string device_clc_version =
324 debug_get_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", "1.1");
325 return device_clc_version;
Aaron Watry293b3e02017-07-21 21:17:50 -0500326}
Pierre Moreau505ec3a2017-10-03 21:07:45 +0200327
328bool
329device::supports_ir(enum pipe_shader_ir ir) const {
330 return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
331 PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);
332}
Pierre Moreaub0336202018-01-21 18:49:00 +0100333
334std::string
335device::supported_extensions() const {
336 return
337 "cl_khr_byte_addressable_store"
338 " cl_khr_global_int32_base_atomics"
339 " cl_khr_global_int32_extended_atomics"
340 " cl_khr_local_int32_base_atomics"
341 " cl_khr_local_int32_extended_atomics"
342 + std::string(has_int64_atomics() ? " cl_khr_int64_base_atomics" : "")
343 + std::string(has_int64_atomics() ? " cl_khr_int64_extended_atomics" : "")
344 + std::string(has_doubles() ? " cl_khr_fp64" : "")
Karol Herbst471fd412019-05-24 15:47:28 +0200345 + std::string(has_halves() ? " cl_khr_fp16" : "")
346 + std::string(svm_support() ? " cl_arm_shared_virtual_memory" : "");
Pierre Moreaub0336202018-01-21 18:49:00 +0100347}
Karol Herbstdeb04ad2019-08-06 20:35:48 +0200348
349const void *
350device::get_compiler_options(enum pipe_shader_ir ir) const {
351 return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);
352}