Yair Shachar | fbeb661 | 2015-05-20 13:48:26 +0300 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2014 Advanced Micro Devices, Inc. |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice shall be included in |
| 12 | * all copies or substantial portions of the Software. |
| 13 | * |
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * |
| 22 | */ |
| 23 | |
| 24 | #include <linux/types.h> |
| 25 | #include <linux/kernel.h> |
| 26 | #include <linux/log2.h> |
| 27 | #include <linux/sched.h> |
| 28 | #include <linux/slab.h> |
| 29 | #include <linux/mutex.h> |
| 30 | #include <linux/device.h> |
| 31 | |
| 32 | #include "kfd_pm4_headers.h" |
| 33 | #include "kfd_pm4_headers_diq.h" |
| 34 | #include "kfd_kernel_queue.h" |
| 35 | #include "kfd_priv.h" |
| 36 | #include "kfd_pm4_opcodes.h" |
| 37 | #include "cik_regs.h" |
| 38 | #include "kfd_dbgmgr.h" |
| 39 | #include "kfd_dbgdev.h" |
| 40 | #include "kfd_device_queue_manager.h" |
| 41 | #include "../../radeon/cik_reg.h" |
| 42 | |
| 43 | static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) |
| 44 | { |
| 45 | BUG_ON(!dev || !dev->kfd2kgd); |
| 46 | |
| 47 | dev->kfd2kgd->address_watch_disable(dev->kgd); |
| 48 | } |
| 49 | |
Yair Shachar | 788bf83 | 2015-05-20 13:58:12 +0300 | [diff] [blame] | 50 | static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, |
| 51 | unsigned int pasid, uint64_t vmid0_address, |
| 52 | uint32_t *packet_buff, size_t size_in_bytes) |
| 53 | { |
| 54 | struct pm4__release_mem *rm_packet; |
| 55 | struct pm4__indirect_buffer_pasid *ib_packet; |
| 56 | struct kfd_mem_obj *mem_obj; |
| 57 | size_t pq_packets_size_in_bytes; |
| 58 | union ULARGE_INTEGER *largep; |
| 59 | union ULARGE_INTEGER addr; |
| 60 | struct kernel_queue *kq; |
| 61 | uint64_t *rm_state; |
| 62 | unsigned int *ib_packet_buff; |
| 63 | int status; |
| 64 | |
| 65 | BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); |
| 66 | |
| 67 | kq = dbgdev->kq; |
| 68 | |
| 69 | pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + |
| 70 | sizeof(struct pm4__indirect_buffer_pasid); |
| 71 | |
| 72 | /* |
| 73 | * We acquire a buffer from DIQ |
| 74 | * The receive packet buff will be sitting on the Indirect Buffer |
| 75 | * and in the PQ we put the IB packet + sync packet(s). |
| 76 | */ |
| 77 | status = kq->ops.acquire_packet_buffer(kq, |
| 78 | pq_packets_size_in_bytes / sizeof(uint32_t), |
| 79 | &ib_packet_buff); |
| 80 | if (status != 0) { |
| 81 | pr_err("amdkfd: acquire_packet_buffer failed\n"); |
| 82 | return status; |
| 83 | } |
| 84 | |
| 85 | memset(ib_packet_buff, 0, pq_packets_size_in_bytes); |
| 86 | |
| 87 | ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); |
| 88 | |
| 89 | ib_packet->header.count = 3; |
| 90 | ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; |
| 91 | ib_packet->header.type = PM4_TYPE_3; |
| 92 | |
| 93 | largep = (union ULARGE_INTEGER *) &vmid0_address; |
| 94 | |
| 95 | ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; |
| 96 | ib_packet->bitfields3.ib_base_hi = largep->u.high_part; |
| 97 | |
| 98 | ib_packet->control = (1 << 23) | (1 << 31) | |
| 99 | ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); |
| 100 | |
| 101 | ib_packet->bitfields5.pasid = pasid; |
| 102 | |
| 103 | /* |
| 104 | * for now we use release mem for GPU-CPU synchronization |
| 105 | * Consider WaitRegMem + WriteData as a better alternative |
| 106 | * we get a GART allocations ( gpu/cpu mapping), |
| 107 | * for the sync variable, and wait until: |
| 108 | * (a) Sync with HW |
| 109 | * (b) Sync var is written by CP to mem. |
| 110 | */ |
| 111 | rm_packet = (struct pm4__release_mem *) (ib_packet_buff + |
| 112 | (sizeof(struct pm4__indirect_buffer_pasid) / |
| 113 | sizeof(unsigned int))); |
| 114 | |
| 115 | status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), |
| 116 | &mem_obj); |
| 117 | |
| 118 | if (status != 0) { |
| 119 | pr_err("amdkfd: Failed to allocate GART memory\n"); |
| 120 | kq->ops.rollback_packet(kq); |
| 121 | return status; |
| 122 | } |
| 123 | |
| 124 | rm_state = (uint64_t *) mem_obj->cpu_ptr; |
| 125 | |
| 126 | *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; |
| 127 | |
| 128 | rm_packet->header.opcode = IT_RELEASE_MEM; |
| 129 | rm_packet->header.type = PM4_TYPE_3; |
| 130 | rm_packet->header.count = sizeof(struct pm4__release_mem) / |
| 131 | sizeof(unsigned int) - 2; |
| 132 | |
| 133 | rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; |
| 134 | rm_packet->bitfields2.event_index = |
| 135 | event_index___release_mem__end_of_pipe; |
| 136 | |
| 137 | rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; |
| 138 | rm_packet->bitfields2.atc = 0; |
| 139 | rm_packet->bitfields2.tc_wb_action_ena = 1; |
| 140 | |
| 141 | addr.quad_part = mem_obj->gpu_addr; |
| 142 | |
| 143 | rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; |
| 144 | rm_packet->address_hi = addr.u.high_part; |
| 145 | |
| 146 | rm_packet->bitfields3.data_sel = |
| 147 | data_sel___release_mem__send_64_bit_data; |
| 148 | |
| 149 | rm_packet->bitfields3.int_sel = |
| 150 | int_sel___release_mem__send_data_after_write_confirm; |
| 151 | |
| 152 | rm_packet->bitfields3.dst_sel = |
| 153 | dst_sel___release_mem__memory_controller; |
| 154 | |
| 155 | rm_packet->data_lo = QUEUESTATE__ACTIVE; |
| 156 | |
| 157 | kq->ops.submit_packet(kq); |
| 158 | |
| 159 | /* Wait till CP writes sync code: */ |
| 160 | status = amdkfd_fence_wait_timeout( |
| 161 | (unsigned int *) rm_state, |
| 162 | QUEUESTATE__ACTIVE, 1500); |
| 163 | |
| 164 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
| 165 | |
| 166 | return status; |
| 167 | } |
| 168 | |
Yair Shachar | fbeb661 | 2015-05-20 13:48:26 +0300 | [diff] [blame] | 169 | static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) |
| 170 | { |
| 171 | BUG_ON(!dbgdev); |
| 172 | |
| 173 | /* |
| 174 | * no action is needed in this case, |
| 175 | * just make sure diq will not be used |
| 176 | */ |
| 177 | |
| 178 | dbgdev->kq = NULL; |
| 179 | |
| 180 | return 0; |
| 181 | } |
| 182 | |
| 183 | static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) |
| 184 | { |
| 185 | struct queue_properties properties; |
| 186 | unsigned int qid; |
| 187 | struct kernel_queue *kq = NULL; |
| 188 | int status; |
| 189 | |
| 190 | BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); |
| 191 | |
| 192 | status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, |
| 193 | &properties, 0, KFD_QUEUE_TYPE_DIQ, |
| 194 | &qid); |
| 195 | |
| 196 | if (status) { |
| 197 | pr_err("amdkfd: Failed to create DIQ\n"); |
| 198 | return status; |
| 199 | } |
| 200 | |
| 201 | pr_debug("DIQ Created with queue id: %d\n", qid); |
| 202 | |
| 203 | kq = pqm_get_kernel_queue(dbgdev->pqm, qid); |
| 204 | |
| 205 | if (kq == NULL) { |
| 206 | pr_err("amdkfd: Error getting DIQ\n"); |
| 207 | pqm_destroy_queue(dbgdev->pqm, qid); |
| 208 | return -EFAULT; |
| 209 | } |
| 210 | |
| 211 | dbgdev->kq = kq; |
| 212 | |
| 213 | return status; |
| 214 | } |
| 215 | |
| 216 | static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) |
| 217 | { |
| 218 | BUG_ON(!dbgdev || !dbgdev->dev); |
| 219 | |
| 220 | /* disable watch address */ |
| 221 | dbgdev_address_watch_disable_nodiq(dbgdev->dev); |
| 222 | return 0; |
| 223 | } |
| 224 | |
| 225 | static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) |
| 226 | { |
| 227 | /* todo - disable address watch */ |
| 228 | int status; |
| 229 | |
| 230 | BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); |
| 231 | |
| 232 | status = pqm_destroy_queue(dbgdev->pqm, |
| 233 | dbgdev->kq->queue->properties.queue_id); |
| 234 | dbgdev->kq = NULL; |
| 235 | |
| 236 | return status; |
| 237 | } |
| 238 | |
Yair Shachar | e2e9afc | 2015-05-20 13:59:17 +0300 | [diff] [blame] | 239 | static void dbgdev_address_watch_set_registers( |
| 240 | const struct dbg_address_watch_info *adw_info, |
| 241 | union TCP_WATCH_ADDR_H_BITS *addrHi, |
| 242 | union TCP_WATCH_ADDR_L_BITS *addrLo, |
| 243 | union TCP_WATCH_CNTL_BITS *cntl, |
| 244 | unsigned int index, unsigned int vmid) |
| 245 | { |
| 246 | union ULARGE_INTEGER addr; |
| 247 | |
| 248 | BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); |
| 249 | |
| 250 | addr.quad_part = 0; |
| 251 | addrHi->u32All = 0; |
| 252 | addrLo->u32All = 0; |
| 253 | cntl->u32All = 0; |
| 254 | |
| 255 | if (adw_info->watch_mask != NULL) |
| 256 | cntl->bitfields.mask = |
| 257 | (uint32_t) (adw_info->watch_mask[index] & |
| 258 | ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); |
| 259 | else |
| 260 | cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; |
| 261 | |
| 262 | addr.quad_part = (unsigned long long) adw_info->watch_address[index]; |
| 263 | |
| 264 | addrHi->bitfields.addr = addr.u.high_part & |
| 265 | ADDRESS_WATCH_REG_ADDHIGH_MASK; |
| 266 | addrLo->bitfields.addr = |
| 267 | (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); |
| 268 | |
| 269 | cntl->bitfields.mode = adw_info->watch_mode[index]; |
| 270 | cntl->bitfields.vmid = (uint32_t) vmid; |
| 271 | /* for now assume it is an ATC address */ |
| 272 | cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; |
| 273 | |
| 274 | pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); |
| 275 | pr_debug("\t\t%20s %08x\n", "set reg add high :", |
| 276 | addrHi->bitfields.addr); |
| 277 | pr_debug("\t\t%20s %08x\n", "set reg add low :", |
| 278 | addrLo->bitfields.addr); |
| 279 | } |
| 280 | |
| 281 | static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, |
| 282 | struct dbg_address_watch_info *adw_info) |
| 283 | { |
| 284 | union TCP_WATCH_ADDR_H_BITS addrHi; |
| 285 | union TCP_WATCH_ADDR_L_BITS addrLo; |
| 286 | union TCP_WATCH_CNTL_BITS cntl; |
| 287 | struct kfd_process_device *pdd; |
| 288 | unsigned int i; |
| 289 | |
| 290 | BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); |
| 291 | |
| 292 | /* taking the vmid for that process on the safe way using pdd */ |
| 293 | pdd = kfd_get_process_device_data(dbgdev->dev, |
| 294 | adw_info->process); |
| 295 | if (!pdd) { |
| 296 | pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); |
| 297 | return -EFAULT; |
| 298 | } |
| 299 | |
| 300 | addrHi.u32All = 0; |
| 301 | addrLo.u32All = 0; |
| 302 | cntl.u32All = 0; |
| 303 | |
| 304 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || |
| 305 | (adw_info->num_watch_points == 0)) { |
| 306 | pr_err("amdkfd: num_watch_points is invalid\n"); |
| 307 | return -EINVAL; |
| 308 | } |
| 309 | |
| 310 | if ((adw_info->watch_mode == NULL) || |
| 311 | (adw_info->watch_address == NULL)) { |
| 312 | pr_err("amdkfd: adw_info fields are not valid\n"); |
| 313 | return -EINVAL; |
| 314 | } |
| 315 | |
| 316 | for (i = 0 ; i < adw_info->num_watch_points ; i++) { |
| 317 | dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, |
| 318 | &cntl, i, pdd->qpd.vmid); |
| 319 | |
| 320 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 321 | pr_debug("\t\t%20s %08x\n", "register index :", i); |
| 322 | pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); |
| 323 | pr_debug("\t\t%20s %08x\n", "Address Low is :", |
| 324 | addrLo.bitfields.addr); |
| 325 | pr_debug("\t\t%20s %08x\n", "Address high is :", |
| 326 | addrHi.bitfields.addr); |
| 327 | pr_debug("\t\t%20s %08x\n", "Address high is :", |
| 328 | addrHi.bitfields.addr); |
| 329 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", |
| 330 | cntl.bitfields.mask); |
| 331 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", |
| 332 | cntl.bitfields.mode); |
| 333 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", |
| 334 | cntl.bitfields.vmid); |
| 335 | pr_debug("\t\t%20s %08x\n", "Control atc is :", |
| 336 | cntl.bitfields.atc); |
| 337 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 338 | |
| 339 | pdd->dev->kfd2kgd->address_watch_execute( |
| 340 | dbgdev->dev->kgd, |
| 341 | i, |
| 342 | cntl.u32All, |
| 343 | addrHi.u32All, |
| 344 | addrLo.u32All); |
| 345 | } |
| 346 | |
| 347 | return 0; |
| 348 | } |
| 349 | |
| 350 | static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, |
| 351 | struct dbg_address_watch_info *adw_info) |
| 352 | { |
| 353 | struct pm4__set_config_reg *packets_vec; |
| 354 | union TCP_WATCH_ADDR_H_BITS addrHi; |
| 355 | union TCP_WATCH_ADDR_L_BITS addrLo; |
| 356 | union TCP_WATCH_CNTL_BITS cntl; |
| 357 | struct kfd_mem_obj *mem_obj; |
| 358 | unsigned int aw_reg_add_dword; |
| 359 | uint32_t *packet_buff_uint; |
| 360 | unsigned int i; |
| 361 | int status; |
| 362 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; |
| 363 | /* we do not control the vmid in DIQ mode, just a place holder */ |
| 364 | unsigned int vmid = 0; |
| 365 | |
| 366 | BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); |
| 367 | |
| 368 | addrHi.u32All = 0; |
| 369 | addrLo.u32All = 0; |
| 370 | cntl.u32All = 0; |
| 371 | |
| 372 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || |
| 373 | (adw_info->num_watch_points == 0)) { |
| 374 | pr_err("amdkfd: num_watch_points is invalid\n"); |
| 375 | return -EINVAL; |
| 376 | } |
| 377 | |
| 378 | if ((NULL == adw_info->watch_mode) || |
| 379 | (NULL == adw_info->watch_address)) { |
| 380 | pr_err("amdkfd: adw_info fields are not valid\n"); |
| 381 | return -EINVAL; |
| 382 | } |
| 383 | |
| 384 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); |
| 385 | |
| 386 | if (status != 0) { |
| 387 | pr_err("amdkfd: Failed to allocate GART memory\n"); |
| 388 | return status; |
| 389 | } |
| 390 | |
| 391 | packet_buff_uint = mem_obj->cpu_ptr; |
| 392 | |
| 393 | memset(packet_buff_uint, 0, ib_size); |
| 394 | |
| 395 | packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); |
| 396 | |
| 397 | packets_vec[0].header.count = 1; |
| 398 | packets_vec[0].header.opcode = IT_SET_CONFIG_REG; |
| 399 | packets_vec[0].header.type = PM4_TYPE_3; |
| 400 | packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; |
| 401 | packets_vec[0].bitfields2.insert_vmid = 1; |
| 402 | packets_vec[1].ordinal1 = packets_vec[0].ordinal1; |
| 403 | packets_vec[1].bitfields2.insert_vmid = 0; |
| 404 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; |
| 405 | packets_vec[2].bitfields2.insert_vmid = 0; |
| 406 | packets_vec[3].ordinal1 = packets_vec[0].ordinal1; |
| 407 | packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; |
| 408 | packets_vec[3].bitfields2.insert_vmid = 1; |
| 409 | |
| 410 | for (i = 0; i < adw_info->num_watch_points; i++) { |
| 411 | dbgdev_address_watch_set_registers(adw_info, |
| 412 | &addrHi, |
| 413 | &addrLo, |
| 414 | &cntl, |
| 415 | i, |
| 416 | vmid); |
| 417 | |
| 418 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 419 | pr_debug("\t\t%20s %08x\n", "register index :", i); |
| 420 | pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); |
| 421 | pr_debug("\t\t%20s %p\n", "Add ptr is :", |
| 422 | adw_info->watch_address); |
| 423 | pr_debug("\t\t%20s %08llx\n", "Add is :", |
| 424 | adw_info->watch_address[i]); |
| 425 | pr_debug("\t\t%20s %08x\n", "Address Low is :", |
| 426 | addrLo.bitfields.addr); |
| 427 | pr_debug("\t\t%20s %08x\n", "Address high is :", |
| 428 | addrHi.bitfields.addr); |
| 429 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", |
| 430 | cntl.bitfields.mask); |
| 431 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", |
| 432 | cntl.bitfields.mode); |
| 433 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", |
| 434 | cntl.bitfields.vmid); |
| 435 | pr_debug("\t\t%20s %08x\n", "Control atc is :", |
| 436 | cntl.bitfields.atc); |
| 437 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 438 | |
| 439 | aw_reg_add_dword = |
| 440 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
| 441 | dbgdev->dev->kgd, |
| 442 | i, |
| 443 | ADDRESS_WATCH_REG_CNTL); |
| 444 | |
| 445 | aw_reg_add_dword /= sizeof(uint32_t); |
| 446 | |
| 447 | packets_vec[0].bitfields2.reg_offset = |
| 448 | aw_reg_add_dword - CONFIG_REG_BASE; |
| 449 | |
| 450 | packets_vec[0].reg_data[0] = cntl.u32All; |
| 451 | |
| 452 | aw_reg_add_dword = |
| 453 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
| 454 | dbgdev->dev->kgd, |
| 455 | i, |
| 456 | ADDRESS_WATCH_REG_ADDR_HI); |
| 457 | |
| 458 | aw_reg_add_dword /= sizeof(uint32_t); |
| 459 | |
| 460 | packets_vec[1].bitfields2.reg_offset = |
| 461 | aw_reg_add_dword - CONFIG_REG_BASE; |
| 462 | packets_vec[1].reg_data[0] = addrHi.u32All; |
| 463 | |
| 464 | aw_reg_add_dword = |
| 465 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
| 466 | dbgdev->dev->kgd, |
| 467 | i, |
| 468 | ADDRESS_WATCH_REG_ADDR_LO); |
| 469 | |
| 470 | aw_reg_add_dword /= sizeof(uint32_t); |
| 471 | |
| 472 | packets_vec[2].bitfields2.reg_offset = |
| 473 | aw_reg_add_dword - CONFIG_REG_BASE; |
| 474 | packets_vec[2].reg_data[0] = addrLo.u32All; |
| 475 | |
| 476 | /* enable watch flag if address is not zero*/ |
| 477 | if (adw_info->watch_address[i] > 0) |
| 478 | cntl.bitfields.valid = 1; |
| 479 | else |
| 480 | cntl.bitfields.valid = 0; |
| 481 | |
| 482 | aw_reg_add_dword = |
| 483 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
| 484 | dbgdev->dev->kgd, |
| 485 | i, |
| 486 | ADDRESS_WATCH_REG_CNTL); |
| 487 | |
| 488 | aw_reg_add_dword /= sizeof(uint32_t); |
| 489 | |
| 490 | packets_vec[3].bitfields2.reg_offset = |
| 491 | aw_reg_add_dword - CONFIG_REG_BASE; |
| 492 | packets_vec[3].reg_data[0] = cntl.u32All; |
| 493 | |
| 494 | status = dbgdev_diq_submit_ib( |
| 495 | dbgdev, |
| 496 | adw_info->process->pasid, |
| 497 | mem_obj->gpu_addr, |
| 498 | packet_buff_uint, |
| 499 | ib_size); |
| 500 | |
| 501 | if (status != 0) { |
| 502 | pr_err("amdkfd: Failed to submit IB to DIQ\n"); |
| 503 | break; |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
| 508 | return status; |
| 509 | } |
| 510 | |
Yair Shachar | 788bf83 | 2015-05-20 13:58:12 +0300 | [diff] [blame] | 511 | static int dbgdev_wave_control_set_registers( |
| 512 | struct dbg_wave_control_info *wac_info, |
| 513 | union SQ_CMD_BITS *in_reg_sq_cmd, |
| 514 | union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) |
| 515 | { |
| 516 | int status; |
| 517 | union SQ_CMD_BITS reg_sq_cmd; |
| 518 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
| 519 | struct HsaDbgWaveMsgAMDGen2 *pMsg; |
| 520 | |
| 521 | BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); |
| 522 | |
| 523 | reg_sq_cmd.u32All = 0; |
| 524 | reg_gfx_index.u32All = 0; |
| 525 | pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; |
| 526 | |
| 527 | switch (wac_info->mode) { |
| 528 | /* Send command to single wave */ |
| 529 | case HSA_DBG_WAVEMODE_SINGLE: |
| 530 | /* |
| 531 | * Limit access to the process waves only, |
| 532 | * by setting vmid check |
| 533 | */ |
| 534 | reg_sq_cmd.bits.check_vmid = 1; |
| 535 | reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; |
| 536 | reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; |
| 537 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; |
| 538 | |
| 539 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; |
| 540 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; |
| 541 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; |
| 542 | |
| 543 | break; |
| 544 | |
| 545 | /* Send command to all waves with matching VMID */ |
| 546 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: |
| 547 | |
| 548 | reg_gfx_index.bits.sh_broadcast_writes = 1; |
| 549 | reg_gfx_index.bits.se_broadcast_writes = 1; |
| 550 | reg_gfx_index.bits.instance_broadcast_writes = 1; |
| 551 | |
| 552 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; |
| 553 | |
| 554 | break; |
| 555 | |
| 556 | /* Send command to all CU waves with matching VMID */ |
| 557 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: |
| 558 | |
| 559 | reg_sq_cmd.bits.check_vmid = 1; |
| 560 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; |
| 561 | |
| 562 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; |
| 563 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; |
| 564 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; |
| 565 | |
| 566 | break; |
| 567 | |
| 568 | default: |
| 569 | return -EINVAL; |
| 570 | } |
| 571 | |
| 572 | switch (wac_info->operand) { |
| 573 | case HSA_DBG_WAVEOP_HALT: |
| 574 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; |
| 575 | break; |
| 576 | |
| 577 | case HSA_DBG_WAVEOP_RESUME: |
| 578 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; |
| 579 | break; |
| 580 | |
| 581 | case HSA_DBG_WAVEOP_KILL: |
| 582 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; |
| 583 | break; |
| 584 | |
| 585 | case HSA_DBG_WAVEOP_DEBUG: |
| 586 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; |
| 587 | break; |
| 588 | |
| 589 | case HSA_DBG_WAVEOP_TRAP: |
| 590 | if (wac_info->trapId < MAX_TRAPID) { |
| 591 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; |
| 592 | reg_sq_cmd.bits.trap_id = wac_info->trapId; |
| 593 | } else { |
| 594 | status = -EINVAL; |
| 595 | } |
| 596 | break; |
| 597 | |
| 598 | default: |
| 599 | status = -EINVAL; |
| 600 | break; |
| 601 | } |
| 602 | |
| 603 | if (status == 0) { |
| 604 | *in_reg_sq_cmd = reg_sq_cmd; |
| 605 | *in_reg_gfx_index = reg_gfx_index; |
| 606 | } |
| 607 | |
| 608 | return status; |
| 609 | } |
| 610 | |
| 611 | static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, |
| 612 | struct dbg_wave_control_info *wac_info) |
| 613 | { |
| 614 | |
| 615 | int status; |
| 616 | union SQ_CMD_BITS reg_sq_cmd; |
| 617 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
| 618 | struct kfd_mem_obj *mem_obj; |
| 619 | uint32_t *packet_buff_uint; |
| 620 | struct pm4__set_config_reg *packets_vec; |
| 621 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; |
| 622 | |
| 623 | BUG_ON(!dbgdev || !wac_info); |
| 624 | |
| 625 | reg_sq_cmd.u32All = 0; |
| 626 | |
| 627 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, |
| 628 | ®_gfx_index); |
| 629 | if (status) { |
| 630 | pr_err("amdkfd: Failed to set wave control registers\n"); |
| 631 | return status; |
| 632 | } |
| 633 | |
| 634 | /* we do not control the VMID in DIQ,so reset it to a known value */ |
| 635 | reg_sq_cmd.bits.vm_id = 0; |
| 636 | |
| 637 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 638 | |
| 639 | pr_debug("\t\t mode is: %u\n", wac_info->mode); |
| 640 | pr_debug("\t\t operand is: %u\n", wac_info->operand); |
| 641 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); |
| 642 | pr_debug("\t\t msg value is: %u\n", |
| 643 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); |
| 644 | pr_debug("\t\t vmid is: N/A\n"); |
| 645 | |
| 646 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); |
| 647 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); |
| 648 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); |
| 649 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); |
| 650 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); |
| 651 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); |
| 652 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); |
| 653 | |
| 654 | pr_debug("\t\t ibw is : %u\n", |
| 655 | reg_gfx_index.bitfields.instance_broadcast_writes); |
| 656 | pr_debug("\t\t ii is : %u\n", |
| 657 | reg_gfx_index.bitfields.instance_index); |
| 658 | pr_debug("\t\t sebw is : %u\n", |
| 659 | reg_gfx_index.bitfields.se_broadcast_writes); |
| 660 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); |
| 661 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); |
| 662 | pr_debug("\t\t sbw is : %u\n", |
| 663 | reg_gfx_index.bitfields.sh_broadcast_writes); |
| 664 | |
| 665 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 666 | |
| 667 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); |
| 668 | |
| 669 | if (status != 0) { |
| 670 | pr_err("amdkfd: Failed to allocate GART memory\n"); |
| 671 | return status; |
| 672 | } |
| 673 | |
| 674 | packet_buff_uint = mem_obj->cpu_ptr; |
| 675 | |
| 676 | memset(packet_buff_uint, 0, ib_size); |
| 677 | |
| 678 | packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; |
| 679 | packets_vec[0].header.count = 1; |
| 680 | packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; |
| 681 | packets_vec[0].header.type = PM4_TYPE_3; |
| 682 | packets_vec[0].bitfields2.reg_offset = |
| 683 | GRBM_GFX_INDEX / (sizeof(uint32_t)) - |
| 684 | USERCONFIG_REG_BASE; |
| 685 | |
| 686 | packets_vec[0].bitfields2.insert_vmid = 0; |
| 687 | packets_vec[0].reg_data[0] = reg_gfx_index.u32All; |
| 688 | |
| 689 | packets_vec[1].header.count = 1; |
| 690 | packets_vec[1].header.opcode = IT_SET_CONFIG_REG; |
| 691 | packets_vec[1].header.type = PM4_TYPE_3; |
| 692 | packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - |
| 693 | CONFIG_REG_BASE; |
| 694 | |
| 695 | packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; |
| 696 | packets_vec[1].bitfields2.insert_vmid = 1; |
| 697 | packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; |
| 698 | |
| 699 | /* Restore the GRBM_GFX_INDEX register */ |
| 700 | |
| 701 | reg_gfx_index.u32All = 0; |
| 702 | reg_gfx_index.bits.sh_broadcast_writes = 1; |
| 703 | reg_gfx_index.bits.instance_broadcast_writes = 1; |
| 704 | reg_gfx_index.bits.se_broadcast_writes = 1; |
| 705 | |
| 706 | |
| 707 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; |
| 708 | packets_vec[2].bitfields2.reg_offset = |
| 709 | GRBM_GFX_INDEX / (sizeof(uint32_t)) - |
| 710 | USERCONFIG_REG_BASE; |
| 711 | |
| 712 | packets_vec[2].bitfields2.insert_vmid = 0; |
| 713 | packets_vec[2].reg_data[0] = reg_gfx_index.u32All; |
| 714 | |
| 715 | status = dbgdev_diq_submit_ib( |
| 716 | dbgdev, |
| 717 | wac_info->process->pasid, |
| 718 | mem_obj->gpu_addr, |
| 719 | packet_buff_uint, |
| 720 | ib_size); |
| 721 | |
| 722 | if (status != 0) |
| 723 | pr_err("amdkfd: Failed to submit IB to DIQ\n"); |
| 724 | |
| 725 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
| 726 | |
| 727 | return status; |
| 728 | } |
| 729 | |
| 730 | static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, |
| 731 | struct dbg_wave_control_info *wac_info) |
| 732 | { |
| 733 | int status; |
| 734 | union SQ_CMD_BITS reg_sq_cmd; |
| 735 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
| 736 | struct kfd_process_device *pdd; |
| 737 | |
| 738 | BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); |
| 739 | |
| 740 | reg_sq_cmd.u32All = 0; |
| 741 | |
| 742 | /* taking the VMID for that process on the safe way using PDD */ |
| 743 | pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); |
| 744 | |
| 745 | if (!pdd) { |
| 746 | pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); |
| 747 | return -EFAULT; |
| 748 | } |
| 749 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, |
| 750 | ®_gfx_index); |
| 751 | if (status) { |
| 752 | pr_err("amdkfd: Failed to set wave control registers\n"); |
| 753 | return status; |
| 754 | } |
| 755 | |
| 756 | /* for non DIQ we need to patch the VMID: */ |
| 757 | |
| 758 | reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; |
| 759 | |
| 760 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 761 | |
| 762 | pr_debug("\t\t mode is: %u\n", wac_info->mode); |
| 763 | pr_debug("\t\t operand is: %u\n", wac_info->operand); |
| 764 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); |
| 765 | pr_debug("\t\t msg value is: %u\n", |
| 766 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); |
| 767 | pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); |
| 768 | |
| 769 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); |
| 770 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); |
| 771 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); |
| 772 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); |
| 773 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); |
| 774 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); |
| 775 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); |
| 776 | |
| 777 | pr_debug("\t\t ibw is : %u\n", |
| 778 | reg_gfx_index.bitfields.instance_broadcast_writes); |
| 779 | pr_debug("\t\t ii is : %u\n", |
| 780 | reg_gfx_index.bitfields.instance_index); |
| 781 | pr_debug("\t\t sebw is : %u\n", |
| 782 | reg_gfx_index.bitfields.se_broadcast_writes); |
| 783 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); |
| 784 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); |
| 785 | pr_debug("\t\t sbw is : %u\n", |
| 786 | reg_gfx_index.bitfields.sh_broadcast_writes); |
| 787 | |
| 788 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); |
| 789 | |
| 790 | return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, |
| 791 | reg_gfx_index.u32All, |
| 792 | reg_sq_cmd.u32All); |
| 793 | } |
| 794 | |
Ben Goz | c3447e8 | 2015-05-20 18:05:44 +0300 | [diff] [blame^] | 795 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) |
| 796 | { |
| 797 | int status = 0; |
| 798 | unsigned int vmid; |
| 799 | union SQ_CMD_BITS reg_sq_cmd; |
| 800 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
| 801 | struct kfd_process_device *pdd; |
| 802 | struct dbg_wave_control_info wac_info; |
| 803 | int temp; |
| 804 | int first_vmid_to_scan = 8; |
| 805 | int last_vmid_to_scan = 15; |
| 806 | |
| 807 | first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; |
| 808 | temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; |
| 809 | last_vmid_to_scan = first_vmid_to_scan + ffz(temp); |
| 810 | |
| 811 | reg_sq_cmd.u32All = 0; |
| 812 | status = 0; |
| 813 | |
| 814 | wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; |
| 815 | wac_info.operand = HSA_DBG_WAVEOP_KILL; |
| 816 | |
| 817 | pr_debug("Killing all process wavefronts\n"); |
| 818 | |
| 819 | /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. |
| 820 | * ATC_VMID15_PASID_MAPPING |
| 821 | * to check which VMID the current process is mapped to. */ |
| 822 | |
| 823 | for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { |
| 824 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid |
| 825 | (dev->kgd, vmid)) { |
| 826 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid |
| 827 | (dev->kgd, vmid) == p->pasid) { |
| 828 | pr_debug("Killing wave fronts of vmid %d and pasid %d\n", |
| 829 | vmid, p->pasid); |
| 830 | break; |
| 831 | } |
| 832 | } |
| 833 | } |
| 834 | |
| 835 | if (vmid > last_vmid_to_scan) { |
| 836 | pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); |
| 837 | return -EFAULT; |
| 838 | } |
| 839 | |
| 840 | /* taking the VMID for that process on the safe way using PDD */ |
| 841 | pdd = kfd_get_process_device_data(dev, p); |
| 842 | if (!pdd) |
| 843 | return -EFAULT; |
| 844 | |
| 845 | status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, |
| 846 | ®_gfx_index); |
| 847 | if (status != 0) |
| 848 | return -EINVAL; |
| 849 | |
| 850 | /* for non DIQ we need to patch the VMID: */ |
| 851 | reg_sq_cmd.bits.vm_id = vmid; |
| 852 | |
| 853 | dev->kfd2kgd->wave_control_execute(dev->kgd, |
| 854 | reg_gfx_index.u32All, |
| 855 | reg_sq_cmd.u32All); |
| 856 | |
| 857 | return 0; |
| 858 | } |
| 859 | |
Yair Shachar | fbeb661 | 2015-05-20 13:48:26 +0300 | [diff] [blame] | 860 | void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, |
| 861 | enum DBGDEV_TYPE type) |
| 862 | { |
| 863 | BUG_ON(!pdbgdev || !pdev); |
| 864 | |
| 865 | pdbgdev->dev = pdev; |
| 866 | pdbgdev->kq = NULL; |
| 867 | pdbgdev->type = type; |
| 868 | pdbgdev->pqm = NULL; |
| 869 | |
| 870 | switch (type) { |
| 871 | case DBGDEV_TYPE_NODIQ: |
| 872 | pdbgdev->dbgdev_register = dbgdev_register_nodiq; |
| 873 | pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; |
Yair Shachar | 788bf83 | 2015-05-20 13:58:12 +0300 | [diff] [blame] | 874 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; |
Yair Shachar | e2e9afc | 2015-05-20 13:59:17 +0300 | [diff] [blame] | 875 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; |
Yair Shachar | fbeb661 | 2015-05-20 13:48:26 +0300 | [diff] [blame] | 876 | break; |
| 877 | case DBGDEV_TYPE_DIQ: |
| 878 | default: |
| 879 | pdbgdev->dbgdev_register = dbgdev_register_diq; |
| 880 | pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; |
Yair Shachar | 788bf83 | 2015-05-20 13:58:12 +0300 | [diff] [blame] | 881 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; |
Yair Shachar | e2e9afc | 2015-05-20 13:59:17 +0300 | [diff] [blame] | 882 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; |
Yair Shachar | fbeb661 | 2015-05-20 13:48:26 +0300 | [diff] [blame] | 883 | break; |
| 884 | } |
| 885 | |
| 886 | } |