blob: ee33b86e493d8e3ffeca2f000a40b0ae713617b4 [file] [log] [blame]
Yair Shacharfbeb6612015-05-20 13:48:26 +03001/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/log2.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/mutex.h>
30#include <linux/device.h>
31
32#include "kfd_pm4_headers.h"
33#include "kfd_pm4_headers_diq.h"
34#include "kfd_kernel_queue.h"
35#include "kfd_priv.h"
36#include "kfd_pm4_opcodes.h"
37#include "cik_regs.h"
38#include "kfd_dbgmgr.h"
39#include "kfd_dbgdev.h"
40#include "kfd_device_queue_manager.h"
41#include "../../radeon/cik_reg.h"
42
43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44{
45 BUG_ON(!dev || !dev->kfd2kgd);
46
47 dev->kfd2kgd->address_watch_disable(dev->kgd);
48}
49
Yair Shachar788bf832015-05-20 13:58:12 +030050static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
51 unsigned int pasid, uint64_t vmid0_address,
52 uint32_t *packet_buff, size_t size_in_bytes)
53{
54 struct pm4__release_mem *rm_packet;
55 struct pm4__indirect_buffer_pasid *ib_packet;
56 struct kfd_mem_obj *mem_obj;
57 size_t pq_packets_size_in_bytes;
58 union ULARGE_INTEGER *largep;
59 union ULARGE_INTEGER addr;
60 struct kernel_queue *kq;
61 uint64_t *rm_state;
62 unsigned int *ib_packet_buff;
63 int status;
64
65 BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
66
67 kq = dbgdev->kq;
68
69 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
70 sizeof(struct pm4__indirect_buffer_pasid);
71
72 /*
73 * We acquire a buffer from DIQ
74 * The receive packet buff will be sitting on the Indirect Buffer
75 * and in the PQ we put the IB packet + sync packet(s).
76 */
77 status = kq->ops.acquire_packet_buffer(kq,
78 pq_packets_size_in_bytes / sizeof(uint32_t),
79 &ib_packet_buff);
80 if (status != 0) {
81 pr_err("amdkfd: acquire_packet_buffer failed\n");
82 return status;
83 }
84
85 memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
86
87 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
88
89 ib_packet->header.count = 3;
90 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
91 ib_packet->header.type = PM4_TYPE_3;
92
93 largep = (union ULARGE_INTEGER *) &vmid0_address;
94
95 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
96 ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
97
98 ib_packet->control = (1 << 23) | (1 << 31) |
99 ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100
101 ib_packet->bitfields5.pasid = pasid;
102
103 /*
104 * for now we use release mem for GPU-CPU synchronization
105 * Consider WaitRegMem + WriteData as a better alternative
106 * we get a GART allocations ( gpu/cpu mapping),
107 * for the sync variable, and wait until:
108 * (a) Sync with HW
109 * (b) Sync var is written by CP to mem.
110 */
111 rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
112 (sizeof(struct pm4__indirect_buffer_pasid) /
113 sizeof(unsigned int)));
114
115 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
116 &mem_obj);
117
118 if (status != 0) {
119 pr_err("amdkfd: Failed to allocate GART memory\n");
120 kq->ops.rollback_packet(kq);
121 return status;
122 }
123
124 rm_state = (uint64_t *) mem_obj->cpu_ptr;
125
126 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127
128 rm_packet->header.opcode = IT_RELEASE_MEM;
129 rm_packet->header.type = PM4_TYPE_3;
130 rm_packet->header.count = sizeof(struct pm4__release_mem) /
131 sizeof(unsigned int) - 2;
132
133 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
134 rm_packet->bitfields2.event_index =
135 event_index___release_mem__end_of_pipe;
136
137 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
138 rm_packet->bitfields2.atc = 0;
139 rm_packet->bitfields2.tc_wb_action_ena = 1;
140
141 addr.quad_part = mem_obj->gpu_addr;
142
143 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
144 rm_packet->address_hi = addr.u.high_part;
145
146 rm_packet->bitfields3.data_sel =
147 data_sel___release_mem__send_64_bit_data;
148
149 rm_packet->bitfields3.int_sel =
150 int_sel___release_mem__send_data_after_write_confirm;
151
152 rm_packet->bitfields3.dst_sel =
153 dst_sel___release_mem__memory_controller;
154
155 rm_packet->data_lo = QUEUESTATE__ACTIVE;
156
157 kq->ops.submit_packet(kq);
158
159 /* Wait till CP writes sync code: */
160 status = amdkfd_fence_wait_timeout(
161 (unsigned int *) rm_state,
162 QUEUESTATE__ACTIVE, 1500);
163
164 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
165
166 return status;
167}
168
Yair Shacharfbeb6612015-05-20 13:48:26 +0300169static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
170{
171 BUG_ON(!dbgdev);
172
173 /*
174 * no action is needed in this case,
175 * just make sure diq will not be used
176 */
177
178 dbgdev->kq = NULL;
179
180 return 0;
181}
182
183static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184{
185 struct queue_properties properties;
186 unsigned int qid;
187 struct kernel_queue *kq = NULL;
188 int status;
189
190 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
191
192 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193 &properties, 0, KFD_QUEUE_TYPE_DIQ,
194 &qid);
195
196 if (status) {
197 pr_err("amdkfd: Failed to create DIQ\n");
198 return status;
199 }
200
201 pr_debug("DIQ Created with queue id: %d\n", qid);
202
203 kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
204
205 if (kq == NULL) {
206 pr_err("amdkfd: Error getting DIQ\n");
207 pqm_destroy_queue(dbgdev->pqm, qid);
208 return -EFAULT;
209 }
210
211 dbgdev->kq = kq;
212
213 return status;
214}
215
216static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
217{
218 BUG_ON(!dbgdev || !dbgdev->dev);
219
220 /* disable watch address */
221 dbgdev_address_watch_disable_nodiq(dbgdev->dev);
222 return 0;
223}
224
225static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
226{
227 /* todo - disable address watch */
228 int status;
229
230 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
231
232 status = pqm_destroy_queue(dbgdev->pqm,
233 dbgdev->kq->queue->properties.queue_id);
234 dbgdev->kq = NULL;
235
236 return status;
237}
238
Yair Shachar788bf832015-05-20 13:58:12 +0300239static int dbgdev_wave_control_set_registers(
240 struct dbg_wave_control_info *wac_info,
241 union SQ_CMD_BITS *in_reg_sq_cmd,
242 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
243{
244 int status;
245 union SQ_CMD_BITS reg_sq_cmd;
246 union GRBM_GFX_INDEX_BITS reg_gfx_index;
247 struct HsaDbgWaveMsgAMDGen2 *pMsg;
248
249 BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
250
251 reg_sq_cmd.u32All = 0;
252 reg_gfx_index.u32All = 0;
253 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
254
255 switch (wac_info->mode) {
256 /* Send command to single wave */
257 case HSA_DBG_WAVEMODE_SINGLE:
258 /*
259 * Limit access to the process waves only,
260 * by setting vmid check
261 */
262 reg_sq_cmd.bits.check_vmid = 1;
263 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
264 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
265 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
266
267 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
268 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
269 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
270
271 break;
272
273 /* Send command to all waves with matching VMID */
274 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
275
276 reg_gfx_index.bits.sh_broadcast_writes = 1;
277 reg_gfx_index.bits.se_broadcast_writes = 1;
278 reg_gfx_index.bits.instance_broadcast_writes = 1;
279
280 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
281
282 break;
283
284 /* Send command to all CU waves with matching VMID */
285 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
286
287 reg_sq_cmd.bits.check_vmid = 1;
288 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
289
290 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
291 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
292 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
293
294 break;
295
296 default:
297 return -EINVAL;
298 }
299
300 switch (wac_info->operand) {
301 case HSA_DBG_WAVEOP_HALT:
302 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
303 break;
304
305 case HSA_DBG_WAVEOP_RESUME:
306 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
307 break;
308
309 case HSA_DBG_WAVEOP_KILL:
310 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
311 break;
312
313 case HSA_DBG_WAVEOP_DEBUG:
314 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
315 break;
316
317 case HSA_DBG_WAVEOP_TRAP:
318 if (wac_info->trapId < MAX_TRAPID) {
319 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
320 reg_sq_cmd.bits.trap_id = wac_info->trapId;
321 } else {
322 status = -EINVAL;
323 }
324 break;
325
326 default:
327 status = -EINVAL;
328 break;
329 }
330
331 if (status == 0) {
332 *in_reg_sq_cmd = reg_sq_cmd;
333 *in_reg_gfx_index = reg_gfx_index;
334 }
335
336 return status;
337}
338
339static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
340 struct dbg_wave_control_info *wac_info)
341{
342
343 int status;
344 union SQ_CMD_BITS reg_sq_cmd;
345 union GRBM_GFX_INDEX_BITS reg_gfx_index;
346 struct kfd_mem_obj *mem_obj;
347 uint32_t *packet_buff_uint;
348 struct pm4__set_config_reg *packets_vec;
349 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
350
351 BUG_ON(!dbgdev || !wac_info);
352
353 reg_sq_cmd.u32All = 0;
354
355 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
356 &reg_gfx_index);
357 if (status) {
358 pr_err("amdkfd: Failed to set wave control registers\n");
359 return status;
360 }
361
362 /* we do not control the VMID in DIQ,so reset it to a known value */
363 reg_sq_cmd.bits.vm_id = 0;
364
365 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
366
367 pr_debug("\t\t mode is: %u\n", wac_info->mode);
368 pr_debug("\t\t operand is: %u\n", wac_info->operand);
369 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
370 pr_debug("\t\t msg value is: %u\n",
371 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
372 pr_debug("\t\t vmid is: N/A\n");
373
374 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
375 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
376 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
377 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
378 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
379 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
380 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
381
382 pr_debug("\t\t ibw is : %u\n",
383 reg_gfx_index.bitfields.instance_broadcast_writes);
384 pr_debug("\t\t ii is : %u\n",
385 reg_gfx_index.bitfields.instance_index);
386 pr_debug("\t\t sebw is : %u\n",
387 reg_gfx_index.bitfields.se_broadcast_writes);
388 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
389 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
390 pr_debug("\t\t sbw is : %u\n",
391 reg_gfx_index.bitfields.sh_broadcast_writes);
392
393 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
394
395 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
396
397 if (status != 0) {
398 pr_err("amdkfd: Failed to allocate GART memory\n");
399 return status;
400 }
401
402 packet_buff_uint = mem_obj->cpu_ptr;
403
404 memset(packet_buff_uint, 0, ib_size);
405
406 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
407 packets_vec[0].header.count = 1;
408 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
409 packets_vec[0].header.type = PM4_TYPE_3;
410 packets_vec[0].bitfields2.reg_offset =
411 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
412 USERCONFIG_REG_BASE;
413
414 packets_vec[0].bitfields2.insert_vmid = 0;
415 packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
416
417 packets_vec[1].header.count = 1;
418 packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
419 packets_vec[1].header.type = PM4_TYPE_3;
420 packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
421 CONFIG_REG_BASE;
422
423 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
424 packets_vec[1].bitfields2.insert_vmid = 1;
425 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
426
427 /* Restore the GRBM_GFX_INDEX register */
428
429 reg_gfx_index.u32All = 0;
430 reg_gfx_index.bits.sh_broadcast_writes = 1;
431 reg_gfx_index.bits.instance_broadcast_writes = 1;
432 reg_gfx_index.bits.se_broadcast_writes = 1;
433
434
435 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
436 packets_vec[2].bitfields2.reg_offset =
437 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
438 USERCONFIG_REG_BASE;
439
440 packets_vec[2].bitfields2.insert_vmid = 0;
441 packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
442
443 status = dbgdev_diq_submit_ib(
444 dbgdev,
445 wac_info->process->pasid,
446 mem_obj->gpu_addr,
447 packet_buff_uint,
448 ib_size);
449
450 if (status != 0)
451 pr_err("amdkfd: Failed to submit IB to DIQ\n");
452
453 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
454
455 return status;
456}
457
458static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
459 struct dbg_wave_control_info *wac_info)
460{
461 int status;
462 union SQ_CMD_BITS reg_sq_cmd;
463 union GRBM_GFX_INDEX_BITS reg_gfx_index;
464 struct kfd_process_device *pdd;
465
466 BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
467
468 reg_sq_cmd.u32All = 0;
469
470 /* taking the VMID for that process on the safe way using PDD */
471 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
472
473 if (!pdd) {
474 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
475 return -EFAULT;
476 }
477 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
478 &reg_gfx_index);
479 if (status) {
480 pr_err("amdkfd: Failed to set wave control registers\n");
481 return status;
482 }
483
484 /* for non DIQ we need to patch the VMID: */
485
486 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
487
488 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
489
490 pr_debug("\t\t mode is: %u\n", wac_info->mode);
491 pr_debug("\t\t operand is: %u\n", wac_info->operand);
492 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
493 pr_debug("\t\t msg value is: %u\n",
494 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
495 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
496
497 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
498 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
499 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
500 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
501 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
502 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
503 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
504
505 pr_debug("\t\t ibw is : %u\n",
506 reg_gfx_index.bitfields.instance_broadcast_writes);
507 pr_debug("\t\t ii is : %u\n",
508 reg_gfx_index.bitfields.instance_index);
509 pr_debug("\t\t sebw is : %u\n",
510 reg_gfx_index.bitfields.se_broadcast_writes);
511 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
512 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
513 pr_debug("\t\t sbw is : %u\n",
514 reg_gfx_index.bitfields.sh_broadcast_writes);
515
516 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
517
518 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
519 reg_gfx_index.u32All,
520 reg_sq_cmd.u32All);
521}
522
Yair Shacharfbeb6612015-05-20 13:48:26 +0300523void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
524 enum DBGDEV_TYPE type)
525{
526 BUG_ON(!pdbgdev || !pdev);
527
528 pdbgdev->dev = pdev;
529 pdbgdev->kq = NULL;
530 pdbgdev->type = type;
531 pdbgdev->pqm = NULL;
532
533 switch (type) {
534 case DBGDEV_TYPE_NODIQ:
535 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
536 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
Yair Shachar788bf832015-05-20 13:58:12 +0300537 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
Yair Shacharfbeb6612015-05-20 13:48:26 +0300538 break;
539 case DBGDEV_TYPE_DIQ:
540 default:
541 pdbgdev->dbgdev_register = dbgdev_register_diq;
542 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
Yair Shachar788bf832015-05-20 13:58:12 +0300543 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
Yair Shacharfbeb6612015-05-20 13:48:26 +0300544 break;
545 }
546
547}