blob: fafe971d1d49daaf38a3d354b671ef84a2c479be [file] [log] [blame]
Oded Gabbay4a488a72014-07-16 21:08:55 +03001/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/amd-iommu.h>
24#include <linux/bsearch.h>
25#include <linux/pci.h>
26#include <linux/slab.h>
27#include "kfd_priv.h"
Ben Goz64c7f8c2014-07-17 01:27:00 +030028#include "kfd_device_queue_manager.h"
Felix Kuehling507968d2017-08-15 23:00:15 -040029#include "kfd_pm4_headers_vi.h"
Felix Kuehling373d7082017-11-14 16:41:19 -050030#include "cwsr_trap_handler_gfx8.asm"
Oded Gabbay4a488a72014-07-16 21:08:55 +030031
Oded Gabbay19f6d2a2014-07-16 23:25:31 +030032#define MQD_SIZE_ALIGNED 768
33
Oded Gabbay4a488a72014-07-16 21:08:55 +030034static const struct kfd_device_info kaveri_device_info = {
Ben Goz0da75582015-01-01 17:10:01 +020035 .asic_family = CHIP_KAVERI,
36 .max_pasid_bits = 16,
Yair Shachar992839a2015-05-20 13:43:04 +030037 /* max num of queues for KV.TODO should be a dynamic value */
38 .max_no_of_hqd = 24,
Ben Goz0da75582015-01-01 17:10:01 +020039 .ih_ring_entry_size = 4 * sizeof(uint32_t),
Andrew Lewyckyf3a39812015-05-10 12:15:46 +030040 .event_interrupt_class = &event_interrupt_class_cik,
Yair Shacharfbeb6612015-05-20 13:48:26 +030041 .num_of_watch_points = 4,
Felix Kuehling373d7082017-11-14 16:41:19 -050042 .mqd_size_aligned = MQD_SIZE_ALIGNED,
43 .supports_cwsr = false,
Felix Kuehling3ee2d002018-01-04 17:17:41 -050044 .needs_pci_atomics = false,
Ben Goz0da75582015-01-01 17:10:01 +020045};
46
47static const struct kfd_device_info carrizo_device_info = {
48 .asic_family = CHIP_CARRIZO,
Oded Gabbay4a488a72014-07-16 21:08:55 +030049 .max_pasid_bits = 16,
Oded Gabbayeaccd6e2015-06-06 21:45:43 +030050 /* max num of queues for CZ.TODO should be a dynamic value */
51 .max_no_of_hqd = 24,
Andrew Lewyckyb3f5e6b2014-07-17 01:37:30 +030052 .ih_ring_entry_size = 4 * sizeof(uint32_t),
Oded Gabbayeaccd6e2015-06-06 21:45:43 +030053 .event_interrupt_class = &event_interrupt_class_cik,
Alexey Skidanovf7c826a2014-10-13 16:35:12 +030054 .num_of_watch_points = 4,
Felix Kuehling373d7082017-11-14 16:41:19 -050055 .mqd_size_aligned = MQD_SIZE_ALIGNED,
56 .supports_cwsr = true,
Felix Kuehling3ee2d002018-01-04 17:17:41 -050057 .needs_pci_atomics = false,
Oded Gabbay4a488a72014-07-16 21:08:55 +030058};
59
60struct kfd_deviceid {
61 unsigned short did;
62 const struct kfd_device_info *device_info;
63};
64
65/* Please keep this sorted by increasing device id. */
66static const struct kfd_deviceid supported_devices[] = {
67 { 0x1304, &kaveri_device_info }, /* Kaveri */
68 { 0x1305, &kaveri_device_info }, /* Kaveri */
69 { 0x1306, &kaveri_device_info }, /* Kaveri */
70 { 0x1307, &kaveri_device_info }, /* Kaveri */
71 { 0x1309, &kaveri_device_info }, /* Kaveri */
72 { 0x130A, &kaveri_device_info }, /* Kaveri */
73 { 0x130B, &kaveri_device_info }, /* Kaveri */
74 { 0x130C, &kaveri_device_info }, /* Kaveri */
75 { 0x130D, &kaveri_device_info }, /* Kaveri */
76 { 0x130E, &kaveri_device_info }, /* Kaveri */
77 { 0x130F, &kaveri_device_info }, /* Kaveri */
78 { 0x1310, &kaveri_device_info }, /* Kaveri */
79 { 0x1311, &kaveri_device_info }, /* Kaveri */
80 { 0x1312, &kaveri_device_info }, /* Kaveri */
81 { 0x1313, &kaveri_device_info }, /* Kaveri */
82 { 0x1315, &kaveri_device_info }, /* Kaveri */
83 { 0x1316, &kaveri_device_info }, /* Kaveri */
84 { 0x1317, &kaveri_device_info }, /* Kaveri */
85 { 0x1318, &kaveri_device_info }, /* Kaveri */
86 { 0x131B, &kaveri_device_info }, /* Kaveri */
87 { 0x131C, &kaveri_device_info }, /* Kaveri */
Ben Goz123576d2015-01-12 14:37:24 +020088 { 0x131D, &kaveri_device_info }, /* Kaveri */
89 { 0x9870, &carrizo_device_info }, /* Carrizo */
90 { 0x9874, &carrizo_device_info }, /* Carrizo */
91 { 0x9875, &carrizo_device_info }, /* Carrizo */
92 { 0x9876, &carrizo_device_info }, /* Carrizo */
93 { 0x9877, &carrizo_device_info } /* Carrizo */
Oded Gabbay4a488a72014-07-16 21:08:55 +030094};
95
Oded Gabbay6e810902014-10-27 14:36:07 +020096static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
97 unsigned int chunk_size);
98static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
99
Yong Zhaob8935a72017-09-20 18:10:13 -0400100static int kfd_resume(struct kfd_dev *kfd);
101
Oded Gabbay4a488a72014-07-16 21:08:55 +0300102static const struct kfd_device_info *lookup_device_info(unsigned short did)
103{
104 size_t i;
105
106 for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
107 if (supported_devices[i].did == did) {
Felix Kuehling32fa8212017-08-15 23:00:12 -0400108 WARN_ON(!supported_devices[i].device_info);
Oded Gabbay4a488a72014-07-16 21:08:55 +0300109 return supported_devices[i].device_info;
110 }
111 }
112
Yong Zhao4ebc7182017-08-15 23:00:13 -0400113 dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
114 did);
115
Oded Gabbay4a488a72014-07-16 21:08:55 +0300116 return NULL;
117}
118
Xihan Zhangcea405b2015-03-17 19:32:53 +0800119struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
120 struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
Oded Gabbay4a488a72014-07-16 21:08:55 +0300121{
122 struct kfd_dev *kfd;
123
124 const struct kfd_device_info *device_info =
125 lookup_device_info(pdev->device);
126
Yong Zhao4ebc7182017-08-15 23:00:13 -0400127 if (!device_info) {
128 dev_err(kfd_device, "kgd2kfd_probe failed\n");
Oded Gabbay4a488a72014-07-16 21:08:55 +0300129 return NULL;
Yong Zhao4ebc7182017-08-15 23:00:13 -0400130 }
Oded Gabbay4a488a72014-07-16 21:08:55 +0300131
Felix Kuehling3ee2d002018-01-04 17:17:41 -0500132 if (device_info->needs_pci_atomics) {
133 /* Allow BIF to recode atomics to PCIe 3.0
134 * AtomicOps. 32 and 64-bit requests are possible and
135 * must be supported.
136 */
137 if (pci_enable_atomic_ops_to_root(pdev,
138 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
139 PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
140 dev_info(kfd_device,
141 "skipped device %x:%x, PCI rejects atomics",
142 pdev->vendor, pdev->device);
143 return NULL;
144 }
145 }
146
Oded Gabbay4a488a72014-07-16 21:08:55 +0300147 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
148 if (!kfd)
149 return NULL;
150
151 kfd->kgd = kgd;
152 kfd->device_info = device_info;
153 kfd->pdev = pdev;
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300154 kfd->init_complete = false;
Xihan Zhangcea405b2015-03-17 19:32:53 +0800155 kfd->kfd2kgd = f2g;
156
157 mutex_init(&kfd->doorbell_mutex);
158 memset(&kfd->doorbell_available_index, 0,
159 sizeof(kfd->doorbell_available_index));
Oded Gabbay4a488a72014-07-16 21:08:55 +0300160
161 return kfd;
162}
163
Oded Gabbayb17f0682014-07-17 00:06:27 +0300164static bool device_iommu_pasid_init(struct kfd_dev *kfd)
165{
166 const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
167 AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
168 AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
169
170 struct amd_iommu_device_info iommu_info;
171 unsigned int pasid_limit;
172 int err;
173
174 err = amd_iommu_device_info(kfd->pdev, &iommu_info);
175 if (err < 0) {
176 dev_err(kfd_device,
177 "error getting iommu info. is the iommu enabled?\n");
178 return false;
179 }
180
181 if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
Kent Russell79775b62017-08-15 23:00:05 -0400182 dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
Oded Gabbayb17f0682014-07-17 00:06:27 +0300183 (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
184 (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
Kent Russell8eabaf52017-08-15 23:00:04 -0400185 (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
186 != 0);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300187 return false;
188 }
189
190 pasid_limit = min_t(unsigned int,
Kent Russell8eabaf52017-08-15 23:00:04 -0400191 (unsigned int)(1 << kfd->device_info->max_pasid_bits),
Oded Gabbayb17f0682014-07-17 00:06:27 +0300192 iommu_info.max_pasids);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300193
Oded Gabbayb17f0682014-07-17 00:06:27 +0300194 if (!kfd_set_pasid_limit(pasid_limit)) {
195 dev_err(kfd_device, "error setting pasid limit\n");
Oded Gabbayb17f0682014-07-17 00:06:27 +0300196 return false;
197 }
198
199 return true;
200}
201
202static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
203{
204 struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
205
206 if (dev)
Yong Zhao733fa1f2017-09-20 18:10:14 -0400207 kfd_process_iommu_unbind_callback(dev, pasid);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300208}
209
Alexey Skidanov59d3e8b2015-04-14 18:05:49 +0300210/*
211 * This function called by IOMMU driver on PPR failure
212 */
213static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
214 unsigned long address, u16 flags)
215{
216 struct kfd_dev *dev;
217
218 dev_warn(kfd_device,
219 "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
220 PCI_BUS_NUM(pdev->devfn),
221 PCI_SLOT(pdev->devfn),
222 PCI_FUNC(pdev->devfn),
223 pasid,
224 address,
225 flags);
226
227 dev = kfd_device_by_pci_dev(pdev);
Felix Kuehling32fa8212017-08-15 23:00:12 -0400228 if (!WARN_ON(!dev))
229 kfd_signal_iommu_event(dev, pasid, address,
Alexey Skidanov59d3e8b2015-04-14 18:05:49 +0300230 flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
231
232 return AMD_IOMMU_INV_PRI_RSP_INVALID;
233}
234
Felix Kuehling373d7082017-11-14 16:41:19 -0500235static void kfd_cwsr_init(struct kfd_dev *kfd)
236{
237 if (cwsr_enable && kfd->device_info->supports_cwsr) {
238 BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
239
240 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
241 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
242 kfd->cwsr_enabled = true;
243 }
244}
245
Oded Gabbay4a488a72014-07-16 21:08:55 +0300246bool kgd2kfd_device_init(struct kfd_dev *kfd,
247 const struct kgd2kfd_shared_resources *gpu_resources)
248{
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300249 unsigned int size;
250
Oded Gabbay4a488a72014-07-16 21:08:55 +0300251 kfd->shared_resources = *gpu_resources;
252
Yong Zhao44008d72017-09-20 18:10:18 -0400253 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
254 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
255 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
256 - kfd->vm_info.first_vmid_kfd + 1;
257
Felix Kuehlinga99c6d42017-11-27 18:29:45 -0500258 /* Verify module parameters regarding mapped process number*/
259 if ((hws_max_conc_proc < 0)
260 || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
261 dev_err(kfd_device,
262 "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
263 hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
264 kfd->vm_info.vmid_num_kfd);
265 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
266 } else
267 kfd->max_proc_per_quantum = hws_max_conc_proc;
268
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300269 /* calculate max size of mqds needed for queues */
Oded Gabbayb8cbab02015-01-18 13:18:01 +0200270 size = max_num_of_queues_per_device *
271 kfd->device_info->mqd_size_aligned;
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300272
Oded Gabbaye18e7942014-10-26 10:12:22 +0200273 /*
274 * calculate max size of runlist packet.
275 * There can be only 2 packets at once
276 */
Felix Kuehling507968d2017-08-15 23:00:15 -0400277 size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
278 max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
279 + sizeof(struct pm4_mes_runlist)) * 2;
Oded Gabbaye18e7942014-10-26 10:12:22 +0200280
281 /* Add size of HIQ & DIQ */
282 size += KFD_KERNEL_QUEUE_SIZE * 2;
283
284 /* add another 512KB for all other allocations on gart (HPD, fences) */
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300285 size += 512 * 1024;
286
Xihan Zhangcea405b2015-03-17 19:32:53 +0800287 if (kfd->kfd2kgd->init_gtt_mem_allocation(
288 kfd->kgd, size, &kfd->gtt_mem,
289 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
Kent Russell79775b62017-08-15 23:00:05 -0400290 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300291 goto out;
292 }
293
Kent Russell79775b62017-08-15 23:00:05 -0400294 dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
Oded Gabbaye18e7942014-10-26 10:12:22 +0200295
Oded Gabbay73a1da02014-10-26 09:53:37 +0200296 /* Initialize GTT sa with 512 byte chunk size */
297 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
Kent Russell79775b62017-08-15 23:00:05 -0400298 dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
Oded Gabbay73a1da02014-10-26 09:53:37 +0200299 goto kfd_gtt_sa_init_error;
300 }
301
Felix Kuehling735df2b2017-08-15 23:00:10 -0400302 if (kfd_doorbell_init(kfd)) {
303 dev_err(kfd_device,
304 "Error initializing doorbell aperture\n");
305 goto kfd_doorbell_error;
306 }
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300307
Kent Russell4eacc26b2017-08-15 23:00:06 -0400308 if (kfd_topology_add_device(kfd)) {
Kent Russell79775b62017-08-15 23:00:05 -0400309 dev_err(kfd_device, "Error adding device to topology\n");
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300310 goto kfd_topology_add_device_error;
311 }
312
Andrew Lewycky2249d552014-07-17 01:37:30 +0300313 if (kfd_interrupt_init(kfd)) {
Kent Russell79775b62017-08-15 23:00:05 -0400314 dev_err(kfd_device, "Error initializing interrupts\n");
Andrew Lewycky2249d552014-07-17 01:37:30 +0300315 goto kfd_interrupt_error;
316 }
317
Ben Goz64c7f8c2014-07-17 01:27:00 +0300318 kfd->dqm = device_queue_manager_init(kfd);
319 if (!kfd->dqm) {
Kent Russell79775b62017-08-15 23:00:05 -0400320 dev_err(kfd_device, "Error initializing queue manager\n");
Ben Goz64c7f8c2014-07-17 01:27:00 +0300321 goto device_queue_manager_error;
322 }
323
Yong Zhaob8935a72017-09-20 18:10:13 -0400324 if (!device_iommu_pasid_init(kfd)) {
Ben Goz64c7f8c2014-07-17 01:27:00 +0300325 dev_err(kfd_device,
Yong Zhaob8935a72017-09-20 18:10:13 -0400326 "Error initializing iommuv2 for device %x:%x\n",
Ben Goz64c7f8c2014-07-17 01:27:00 +0300327 kfd->pdev->vendor, kfd->pdev->device);
Yong Zhaob8935a72017-09-20 18:10:13 -0400328 goto device_iommu_pasid_error;
Ben Goz64c7f8c2014-07-17 01:27:00 +0300329 }
330
Felix Kuehling373d7082017-11-14 16:41:19 -0500331 kfd_cwsr_init(kfd);
332
Yong Zhaob8935a72017-09-20 18:10:13 -0400333 if (kfd_resume(kfd))
334 goto kfd_resume_error;
335
Yair Shacharfbeb6612015-05-20 13:48:26 +0300336 kfd->dbgmgr = NULL;
337
Oded Gabbay4a488a72014-07-16 21:08:55 +0300338 kfd->init_complete = true;
Kent Russell79775b62017-08-15 23:00:05 -0400339 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
Oded Gabbay4a488a72014-07-16 21:08:55 +0300340 kfd->pdev->device);
341
Kent Russell79775b62017-08-15 23:00:05 -0400342 pr_debug("Starting kfd with the following scheduling policy %d\n",
Ben Goz64c7f8c2014-07-17 01:27:00 +0300343 sched_policy);
344
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300345 goto out;
346
Yong Zhaob8935a72017-09-20 18:10:13 -0400347kfd_resume_error:
348device_iommu_pasid_error:
Ben Goz64c7f8c2014-07-17 01:27:00 +0300349 device_queue_manager_uninit(kfd->dqm);
350device_queue_manager_error:
Andrew Lewycky2249d552014-07-17 01:37:30 +0300351 kfd_interrupt_exit(kfd);
352kfd_interrupt_error:
Oded Gabbayb17f0682014-07-17 00:06:27 +0300353 kfd_topology_remove_device(kfd);
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300354kfd_topology_add_device_error:
Felix Kuehling735df2b2017-08-15 23:00:10 -0400355 kfd_doorbell_fini(kfd);
356kfd_doorbell_error:
Oded Gabbay73a1da02014-10-26 09:53:37 +0200357 kfd_gtt_sa_fini(kfd);
358kfd_gtt_sa_init_error:
Xihan Zhangcea405b2015-03-17 19:32:53 +0800359 kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300360 dev_err(kfd_device,
Kent Russell79775b62017-08-15 23:00:05 -0400361 "device %x:%x NOT added due to errors\n",
Oded Gabbay19f6d2a2014-07-16 23:25:31 +0300362 kfd->pdev->vendor, kfd->pdev->device);
363out:
364 return kfd->init_complete;
Oded Gabbay4a488a72014-07-16 21:08:55 +0300365}
366
367void kgd2kfd_device_exit(struct kfd_dev *kfd)
368{
Oded Gabbayb17f0682014-07-17 00:06:27 +0300369 if (kfd->init_complete) {
Yong Zhaob8935a72017-09-20 18:10:13 -0400370 kgd2kfd_suspend(kfd);
Ben Goz64c7f8c2014-07-17 01:27:00 +0300371 device_queue_manager_uninit(kfd->dqm);
Andrew Lewycky2249d552014-07-17 01:37:30 +0300372 kfd_interrupt_exit(kfd);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300373 kfd_topology_remove_device(kfd);
Felix Kuehling735df2b2017-08-15 23:00:10 -0400374 kfd_doorbell_fini(kfd);
Oded Gabbay73a1da02014-10-26 09:53:37 +0200375 kfd_gtt_sa_fini(kfd);
Xihan Zhangcea405b2015-03-17 19:32:53 +0800376 kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300377 }
Evgeny Pinchuk5b5c4e42014-07-16 21:22:32 +0300378
Oded Gabbay4a488a72014-07-16 21:08:55 +0300379 kfree(kfd);
380}
381
382void kgd2kfd_suspend(struct kfd_dev *kfd)
383{
Yong Zhao733fa1f2017-09-20 18:10:14 -0400384 if (!kfd->init_complete)
385 return;
386
387 kfd->dqm->ops.stop(kfd->dqm);
388
389 kfd_unbind_processes_from_device(kfd);
390
391 amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
392 amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
393 amd_iommu_free_device(kfd->pdev);
Oded Gabbay4a488a72014-07-16 21:08:55 +0300394}
395
396int kgd2kfd_resume(struct kfd_dev *kfd)
397{
Yong Zhaob8935a72017-09-20 18:10:13 -0400398 if (!kfd->init_complete)
399 return 0;
Oded Gabbayb17f0682014-07-17 00:06:27 +0300400
Yong Zhaob8935a72017-09-20 18:10:13 -0400401 return kfd_resume(kfd);
Oded Gabbayb17f0682014-07-17 00:06:27 +0300402
Yong Zhaob8935a72017-09-20 18:10:13 -0400403}
Yong Zhao4ebc7182017-08-15 23:00:13 -0400404
Yong Zhaob8935a72017-09-20 18:10:13 -0400405static int kfd_resume(struct kfd_dev *kfd)
406{
407 int err = 0;
408 unsigned int pasid_limit = kfd_get_pasid_limit();
409
410 err = amd_iommu_init_device(kfd->pdev, pasid_limit);
411 if (err)
412 return -ENXIO;
413 amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
414 iommu_pasid_shutdown_callback);
415 amd_iommu_set_invalid_ppr_cb(kfd->pdev,
416 iommu_invalid_ppr_cb);
417
Yong Zhao733fa1f2017-09-20 18:10:14 -0400418 err = kfd_bind_processes_to_device(kfd);
419 if (err)
420 goto processes_bind_error;
421
Yong Zhaob8935a72017-09-20 18:10:13 -0400422 err = kfd->dqm->ops.start(kfd->dqm);
423 if (err) {
424 dev_err(kfd_device,
425 "Error starting queue manager for device %x:%x\n",
426 kfd->pdev->vendor, kfd->pdev->device);
427 goto dqm_start_error;
Oded Gabbayb17f0682014-07-17 00:06:27 +0300428 }
429
Yong Zhaob8935a72017-09-20 18:10:13 -0400430 return err;
431
432dqm_start_error:
Yong Zhao733fa1f2017-09-20 18:10:14 -0400433processes_bind_error:
Yong Zhaob8935a72017-09-20 18:10:13 -0400434 amd_iommu_free_device(kfd->pdev);
435
436 return err;
Oded Gabbay4a488a72014-07-16 21:08:55 +0300437}
438
Andrew Lewyckyb3f5e6b2014-07-17 01:37:30 +0300439/* This is called directly from KGD at ISR. */
440void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
Oded Gabbay4a488a72014-07-16 21:08:55 +0300441{
Andrew Lewycky2249d552014-07-17 01:37:30 +0300442 if (!kfd->init_complete)
443 return;
444
445 spin_lock(&kfd->interrupt_lock);
446
447 if (kfd->interrupts_active
448 && interrupt_is_wanted(kfd, ih_ring_entry)
449 && enqueue_ih_ring_entry(kfd, ih_ring_entry))
Andres Rodriguez48e876a2017-10-27 19:35:34 -0400450 queue_work(kfd->ih_wq, &kfd->interrupt_work);
Andrew Lewycky2249d552014-07-17 01:37:30 +0300451
452 spin_unlock(&kfd->interrupt_lock);
Oded Gabbay4a488a72014-07-16 21:08:55 +0300453}
Oded Gabbay6e810902014-10-27 14:36:07 +0200454
455static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
456 unsigned int chunk_size)
457{
Felix Kuehling8625ff92017-08-15 23:00:11 -0400458 unsigned int num_of_longs;
Oded Gabbay6e810902014-10-27 14:36:07 +0200459
Felix Kuehling32fa8212017-08-15 23:00:12 -0400460 if (WARN_ON(buf_size < chunk_size))
461 return -EINVAL;
462 if (WARN_ON(buf_size == 0))
463 return -EINVAL;
464 if (WARN_ON(chunk_size == 0))
465 return -EINVAL;
Oded Gabbay6e810902014-10-27 14:36:07 +0200466
467 kfd->gtt_sa_chunk_size = chunk_size;
468 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
469
Felix Kuehling8625ff92017-08-15 23:00:11 -0400470 num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
471 BITS_PER_LONG;
Oded Gabbay6e810902014-10-27 14:36:07 +0200472
Felix Kuehling8625ff92017-08-15 23:00:11 -0400473 kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
Oded Gabbay6e810902014-10-27 14:36:07 +0200474
475 if (!kfd->gtt_sa_bitmap)
476 return -ENOMEM;
477
Kent Russell79775b62017-08-15 23:00:05 -0400478 pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
Oded Gabbay6e810902014-10-27 14:36:07 +0200479 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
480
481 mutex_init(&kfd->gtt_sa_lock);
482
483 return 0;
484
485}
486
487static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
488{
489 mutex_destroy(&kfd->gtt_sa_lock);
490 kfree(kfd->gtt_sa_bitmap);
491}
492
493static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
494 unsigned int bit_num,
495 unsigned int chunk_size)
496{
497 return start_addr + bit_num * chunk_size;
498}
499
500static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
501 unsigned int bit_num,
502 unsigned int chunk_size)
503{
504 return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
505}
506
507int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
508 struct kfd_mem_obj **mem_obj)
509{
510 unsigned int found, start_search, cur_size;
511
Oded Gabbay6e810902014-10-27 14:36:07 +0200512 if (size == 0)
513 return -EINVAL;
514
515 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
516 return -ENOMEM;
517
518 *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
519 if ((*mem_obj) == NULL)
520 return -ENOMEM;
521
Kent Russell79775b62017-08-15 23:00:05 -0400522 pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
Oded Gabbay6e810902014-10-27 14:36:07 +0200523
524 start_search = 0;
525
526 mutex_lock(&kfd->gtt_sa_lock);
527
528kfd_gtt_restart_search:
529 /* Find the first chunk that is free */
530 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
531 kfd->gtt_sa_num_of_chunks,
532 start_search);
533
Kent Russell79775b62017-08-15 23:00:05 -0400534 pr_debug("Found = %d\n", found);
Oded Gabbay6e810902014-10-27 14:36:07 +0200535
536 /* If there wasn't any free chunk, bail out */
537 if (found == kfd->gtt_sa_num_of_chunks)
538 goto kfd_gtt_no_free_chunk;
539
540 /* Update fields of mem_obj */
541 (*mem_obj)->range_start = found;
542 (*mem_obj)->range_end = found;
543 (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
544 kfd->gtt_start_gpu_addr,
545 found,
546 kfd->gtt_sa_chunk_size);
547 (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
548 kfd->gtt_start_cpu_ptr,
549 found,
550 kfd->gtt_sa_chunk_size);
551
Kent Russell79775b62017-08-15 23:00:05 -0400552 pr_debug("gpu_addr = %p, cpu_addr = %p\n",
Oded Gabbay6e810902014-10-27 14:36:07 +0200553 (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
554
555 /* If we need only one chunk, mark it as allocated and get out */
556 if (size <= kfd->gtt_sa_chunk_size) {
Kent Russell79775b62017-08-15 23:00:05 -0400557 pr_debug("Single bit\n");
Oded Gabbay6e810902014-10-27 14:36:07 +0200558 set_bit(found, kfd->gtt_sa_bitmap);
559 goto kfd_gtt_out;
560 }
561
562 /* Otherwise, try to see if we have enough contiguous chunks */
563 cur_size = size - kfd->gtt_sa_chunk_size;
564 do {
565 (*mem_obj)->range_end =
566 find_next_zero_bit(kfd->gtt_sa_bitmap,
567 kfd->gtt_sa_num_of_chunks, ++found);
568 /*
569 * If next free chunk is not contiguous than we need to
570 * restart our search from the last free chunk we found (which
571 * wasn't contiguous to the previous ones
572 */
573 if ((*mem_obj)->range_end != found) {
574 start_search = found;
575 goto kfd_gtt_restart_search;
576 }
577
578 /*
579 * If we reached end of buffer, bail out with error
580 */
581 if (found == kfd->gtt_sa_num_of_chunks)
582 goto kfd_gtt_no_free_chunk;
583
584 /* Check if we don't need another chunk */
585 if (cur_size <= kfd->gtt_sa_chunk_size)
586 cur_size = 0;
587 else
588 cur_size -= kfd->gtt_sa_chunk_size;
589
590 } while (cur_size > 0);
591
Kent Russell79775b62017-08-15 23:00:05 -0400592 pr_debug("range_start = %d, range_end = %d\n",
Oded Gabbay6e810902014-10-27 14:36:07 +0200593 (*mem_obj)->range_start, (*mem_obj)->range_end);
594
595 /* Mark the chunks as allocated */
596 for (found = (*mem_obj)->range_start;
597 found <= (*mem_obj)->range_end;
598 found++)
599 set_bit(found, kfd->gtt_sa_bitmap);
600
601kfd_gtt_out:
602 mutex_unlock(&kfd->gtt_sa_lock);
603 return 0;
604
605kfd_gtt_no_free_chunk:
Kent Russell79775b62017-08-15 23:00:05 -0400606 pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
Oded Gabbay6e810902014-10-27 14:36:07 +0200607 mutex_unlock(&kfd->gtt_sa_lock);
608 kfree(mem_obj);
609 return -ENOMEM;
610}
611
612int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
613{
614 unsigned int bit;
615
Oded Gabbay9216ed22015-01-12 22:34:21 +0200616 /* Act like kfree when trying to free a NULL object */
617 if (!mem_obj)
618 return 0;
Oded Gabbay6e810902014-10-27 14:36:07 +0200619
Kent Russell79775b62017-08-15 23:00:05 -0400620 pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
Oded Gabbay6e810902014-10-27 14:36:07 +0200621 mem_obj, mem_obj->range_start, mem_obj->range_end);
622
623 mutex_lock(&kfd->gtt_sa_lock);
624
625 /* Mark the chunks as free */
626 for (bit = mem_obj->range_start;
627 bit <= mem_obj->range_end;
628 bit++)
629 clear_bit(bit, kfd->gtt_sa_bitmap);
630
631 mutex_unlock(&kfd->gtt_sa_lock);
632
633 kfree(mem_obj);
634 return 0;
635}