Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2014 Advanced Micro Devices, Inc. |
| 3 | * All Rights Reserved. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | * copy of this software and associated documentation files (the |
| 7 | * "Software"), to deal in the Software without restriction, including |
| 8 | * without limitation the rights to use, copy, modify, merge, publish, |
| 9 | * distribute, sub license, and/or sell copies of the Software, and to |
| 10 | * permit persons to whom the Software is furnished to do so, subject to |
| 11 | * the following conditions: |
| 12 | * |
| 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| 16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| 17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| 19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 20 | * |
| 21 | * The above copyright notice and this permission notice (including the |
| 22 | * next paragraph) shall be included in all copies or substantial portions |
| 23 | * of the Software. |
| 24 | * |
| 25 | */ |
| 26 | /* |
| 27 | * Authors: |
| 28 | * Christian König <christian.koenig@amd.com> |
| 29 | */ |
| 30 | |
| 31 | #include <drm/drmP.h> |
| 32 | #include "amdgpu.h" |
| 33 | #include "amdgpu_trace.h" |
| 34 | |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 35 | struct amdgpu_sync_entry { |
| 36 | struct hlist_node node; |
| 37 | struct fence *fence; |
| 38 | }; |
| 39 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 40 | /** |
| 41 | * amdgpu_sync_create - zero init sync object |
| 42 | * |
| 43 | * @sync: sync object to initialize |
| 44 | * |
| 45 | * Just clear the sync object for now. |
| 46 | */ |
| 47 | void amdgpu_sync_create(struct amdgpu_sync *sync) |
| 48 | { |
| 49 | unsigned i; |
| 50 | |
| 51 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) |
| 52 | sync->semaphores[i] = NULL; |
| 53 | |
| 54 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
| 55 | sync->sync_to[i] = NULL; |
| 56 | |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 57 | hash_init(sync->fences); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 58 | sync->last_vm_update = NULL; |
| 59 | } |
| 60 | |
Chunming Zhou | 3c62338 | 2015-08-20 18:33:59 +0800 | [diff] [blame] | 61 | static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) |
| 62 | { |
| 63 | struct amdgpu_fence *a_fence = to_amdgpu_fence(f); |
| 64 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); |
| 65 | |
| 66 | if (a_fence) |
| 67 | return a_fence->ring->adev == adev; |
| 68 | if (s_fence) |
Christian König | 9b398fa | 2015-09-07 18:16:49 +0200 | [diff] [blame] | 69 | return (struct amdgpu_device *)s_fence->sched->priv == adev; |
Chunming Zhou | 3c62338 | 2015-08-20 18:33:59 +0800 | [diff] [blame] | 70 | return false; |
| 71 | } |
| 72 | |
| 73 | static bool amdgpu_sync_test_owner(struct fence *f, void *owner) |
| 74 | { |
| 75 | struct amdgpu_fence *a_fence = to_amdgpu_fence(f); |
| 76 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); |
| 77 | if (s_fence) |
| 78 | return s_fence->owner == owner; |
| 79 | if (a_fence) |
| 80 | return a_fence->owner == owner; |
| 81 | return false; |
| 82 | } |
| 83 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 84 | /** |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 85 | * amdgpu_sync_fence - remember to sync to this fence |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 86 | * |
| 87 | * @sync: sync object to add fence to |
| 88 | * @fence: fence to sync to |
| 89 | * |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 90 | */ |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 91 | int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
| 92 | struct fence *f) |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 93 | { |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 94 | struct amdgpu_sync_entry *e; |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 95 | struct amdgpu_fence *fence; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 96 | struct amdgpu_fence *other; |
Chunming Zhou | 3c62338 | 2015-08-20 18:33:59 +0800 | [diff] [blame] | 97 | struct fence *tmp, *later; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 98 | |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 99 | if (!f) |
| 100 | return 0; |
| 101 | |
Chunming Zhou | 3c62338 | 2015-08-20 18:33:59 +0800 | [diff] [blame] | 102 | if (amdgpu_sync_same_dev(adev, f) && |
| 103 | amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { |
| 104 | if (sync->last_vm_update) { |
| 105 | tmp = sync->last_vm_update; |
| 106 | BUG_ON(f->context != tmp->context); |
| 107 | later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; |
| 108 | sync->last_vm_update = fence_get(later); |
| 109 | fence_put(tmp); |
| 110 | } else |
| 111 | sync->last_vm_update = fence_get(f); |
| 112 | } |
| 113 | |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 114 | fence = to_amdgpu_fence(f); |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 115 | if (!fence || fence->ring->adev != adev) { |
| 116 | hash_for_each_possible(sync->fences, e, node, f->context) { |
| 117 | struct fence *new; |
| 118 | if (unlikely(e->fence->context != f->context)) |
| 119 | continue; |
| 120 | new = fence_get(fence_later(e->fence, f)); |
| 121 | if (new) { |
| 122 | fence_put(e->fence); |
| 123 | e->fence = new; |
| 124 | } |
| 125 | return 0; |
| 126 | } |
| 127 | |
| 128 | e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); |
| 129 | if (!e) |
| 130 | return -ENOMEM; |
| 131 | |
| 132 | hash_add(sync->fences, &e->node, f->context); |
| 133 | e->fence = fence_get(f); |
| 134 | return 0; |
| 135 | } |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 136 | |
| 137 | other = sync->sync_to[fence->ring->idx]; |
| 138 | sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( |
| 139 | amdgpu_fence_later(fence, other)); |
| 140 | amdgpu_fence_unref(&other); |
| 141 | |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 142 | return 0; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 143 | } |
| 144 | |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 145 | static void *amdgpu_sync_get_owner(struct fence *f) |
| 146 | { |
| 147 | struct amdgpu_fence *a_fence = to_amdgpu_fence(f); |
| 148 | struct amd_sched_fence *s_fence = to_amd_sched_fence(f); |
| 149 | |
| 150 | if (s_fence) |
| 151 | return s_fence->owner; |
| 152 | else if (a_fence) |
| 153 | return a_fence->owner; |
| 154 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
| 155 | } |
| 156 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 157 | /** |
| 158 | * amdgpu_sync_resv - use the semaphores to sync to a reservation object |
| 159 | * |
| 160 | * @sync: sync object to add fences from reservation object to |
| 161 | * @resv: reservation object with embedded fence |
| 162 | * @shared: true if we should only sync to the exclusive fence |
| 163 | * |
| 164 | * Sync to the fence using the semaphore objects |
| 165 | */ |
| 166 | int amdgpu_sync_resv(struct amdgpu_device *adev, |
| 167 | struct amdgpu_sync *sync, |
| 168 | struct reservation_object *resv, |
| 169 | void *owner) |
| 170 | { |
| 171 | struct reservation_object_list *flist; |
| 172 | struct fence *f; |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 173 | void *fence_owner; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 174 | unsigned i; |
| 175 | int r = 0; |
| 176 | |
Jammy Zhou | 4b09530 | 2015-05-12 23:17:19 +0800 | [diff] [blame] | 177 | if (resv == NULL) |
| 178 | return -EINVAL; |
| 179 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 180 | /* always sync to the exclusive fence */ |
| 181 | f = reservation_object_get_excl(resv); |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 182 | r = amdgpu_sync_fence(adev, sync, f); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 183 | |
| 184 | flist = reservation_object_get_list(resv); |
| 185 | if (!flist || r) |
| 186 | return r; |
| 187 | |
| 188 | for (i = 0; i < flist->shared_count; ++i) { |
| 189 | f = rcu_dereference_protected(flist->shared[i], |
| 190 | reservation_object_held(resv)); |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 191 | if (amdgpu_sync_same_dev(adev, f)) { |
Christian König | 1d3897e | 2015-07-27 15:40:35 +0200 | [diff] [blame] | 192 | /* VM updates are only interesting |
| 193 | * for other VM updates and moves. |
| 194 | */ |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 195 | fence_owner = amdgpu_sync_get_owner(f); |
Christian König | 1d3897e | 2015-07-27 15:40:35 +0200 | [diff] [blame] | 196 | if ((owner != AMDGPU_FENCE_OWNER_MOVE) && |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 197 | (fence_owner != AMDGPU_FENCE_OWNER_MOVE) && |
Christian König | 1d3897e | 2015-07-27 15:40:35 +0200 | [diff] [blame] | 198 | ((owner == AMDGPU_FENCE_OWNER_VM) != |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 199 | (fence_owner == AMDGPU_FENCE_OWNER_VM))) |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 200 | continue; |
| 201 | |
Christian König | 1d3897e | 2015-07-27 15:40:35 +0200 | [diff] [blame] | 202 | /* Ignore fence from the same owner as |
| 203 | * long as it isn't undefined. |
| 204 | */ |
| 205 | if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && |
Chunming Zhou | 423a948 | 2015-08-24 16:59:54 +0800 | [diff] [blame] | 206 | fence_owner == owner) |
Christian König | 1d3897e | 2015-07-27 15:40:35 +0200 | [diff] [blame] | 207 | continue; |
| 208 | } |
| 209 | |
Christian König | 91e1a52 | 2015-07-06 22:06:40 +0200 | [diff] [blame] | 210 | r = amdgpu_sync_fence(adev, sync, f); |
| 211 | if (r) |
| 212 | break; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 213 | } |
| 214 | return r; |
| 215 | } |
| 216 | |
Christian König | e61235d | 2015-08-25 11:05:36 +0200 | [diff] [blame] | 217 | struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) |
| 218 | { |
| 219 | struct amdgpu_sync_entry *e; |
| 220 | struct hlist_node *tmp; |
| 221 | struct fence *f; |
| 222 | int i; |
| 223 | |
| 224 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
| 225 | |
| 226 | f = e->fence; |
| 227 | |
| 228 | hash_del(&e->node); |
| 229 | kfree(e); |
| 230 | |
| 231 | if (!fence_is_signaled(f)) |
| 232 | return f; |
| 233 | |
| 234 | fence_put(f); |
| 235 | } |
| 236 | return NULL; |
| 237 | } |
| 238 | |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 239 | int amdgpu_sync_wait(struct amdgpu_sync *sync) |
| 240 | { |
| 241 | struct amdgpu_sync_entry *e; |
| 242 | struct hlist_node *tmp; |
| 243 | int i, r; |
| 244 | |
| 245 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
| 246 | r = fence_wait(e->fence, false); |
| 247 | if (r) |
| 248 | return r; |
| 249 | |
| 250 | hash_del(&e->node); |
| 251 | fence_put(e->fence); |
| 252 | kfree(e); |
| 253 | } |
Christian König | 3daea9e3d | 2015-09-05 11:12:27 +0200 | [diff] [blame] | 254 | |
| 255 | if (amdgpu_enable_semaphores) |
| 256 | return 0; |
| 257 | |
| 258 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
| 259 | struct amdgpu_fence *fence = sync->sync_to[i]; |
| 260 | if (!fence) |
| 261 | continue; |
| 262 | |
| 263 | r = fence_wait(&fence->base, false); |
| 264 | if (r) |
| 265 | return r; |
| 266 | } |
| 267 | |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 268 | return 0; |
| 269 | } |
| 270 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 271 | /** |
| 272 | * amdgpu_sync_rings - sync ring to all registered fences |
| 273 | * |
| 274 | * @sync: sync object to use |
| 275 | * @ring: ring that needs sync |
| 276 | * |
| 277 | * Ensure that all registered fences are signaled before letting |
| 278 | * the ring continue. The caller must hold the ring lock. |
| 279 | */ |
| 280 | int amdgpu_sync_rings(struct amdgpu_sync *sync, |
| 281 | struct amdgpu_ring *ring) |
| 282 | { |
| 283 | struct amdgpu_device *adev = ring->adev; |
| 284 | unsigned count = 0; |
| 285 | int i, r; |
| 286 | |
| 287 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
| 288 | struct amdgpu_fence *fence = sync->sync_to[i]; |
| 289 | struct amdgpu_semaphore *semaphore; |
| 290 | struct amdgpu_ring *other = adev->rings[i]; |
| 291 | |
| 292 | /* check if we really need to sync */ |
| 293 | if (!amdgpu_fence_need_sync(fence, ring)) |
| 294 | continue; |
| 295 | |
| 296 | /* prevent GPU deadlocks */ |
| 297 | if (!other->ready) { |
| 298 | dev_err(adev->dev, "Syncing to a disabled ring!"); |
| 299 | return -EINVAL; |
| 300 | } |
| 301 | |
Christian König | 3daea9e3d | 2015-09-05 11:12:27 +0200 | [diff] [blame] | 302 | if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || |
| 303 | (count >= AMDGPU_NUM_SYNCS)) { |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 304 | /* not enough room, wait manually */ |
Christian König | 02bc065 | 2015-08-07 13:53:36 +0200 | [diff] [blame] | 305 | r = fence_wait(&fence->base, false); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 306 | if (r) |
| 307 | return r; |
| 308 | continue; |
| 309 | } |
| 310 | r = amdgpu_semaphore_create(adev, &semaphore); |
| 311 | if (r) |
| 312 | return r; |
| 313 | |
| 314 | sync->semaphores[count++] = semaphore; |
| 315 | |
| 316 | /* allocate enough space for sync command */ |
| 317 | r = amdgpu_ring_alloc(other, 16); |
| 318 | if (r) |
| 319 | return r; |
| 320 | |
| 321 | /* emit the signal semaphore */ |
| 322 | if (!amdgpu_semaphore_emit_signal(other, semaphore)) { |
| 323 | /* signaling wasn't successful wait manually */ |
| 324 | amdgpu_ring_undo(other); |
Christian König | 02bc065 | 2015-08-07 13:53:36 +0200 | [diff] [blame] | 325 | r = fence_wait(&fence->base, false); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 326 | if (r) |
| 327 | return r; |
| 328 | continue; |
| 329 | } |
| 330 | |
| 331 | /* we assume caller has already allocated space on waiters ring */ |
| 332 | if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { |
| 333 | /* waiting wasn't successful wait manually */ |
| 334 | amdgpu_ring_undo(other); |
Christian König | 02bc065 | 2015-08-07 13:53:36 +0200 | [diff] [blame] | 335 | r = fence_wait(&fence->base, false); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 336 | if (r) |
| 337 | return r; |
| 338 | continue; |
| 339 | } |
| 340 | |
| 341 | amdgpu_ring_commit(other); |
| 342 | amdgpu_fence_note_sync(fence, ring); |
| 343 | } |
| 344 | |
| 345 | return 0; |
| 346 | } |
| 347 | |
| 348 | /** |
| 349 | * amdgpu_sync_free - free the sync object |
| 350 | * |
| 351 | * @adev: amdgpu_device pointer |
| 352 | * @sync: sync object to use |
| 353 | * @fence: fence to use for the free |
| 354 | * |
| 355 | * Free the sync object by freeing all semaphores in it. |
| 356 | */ |
| 357 | void amdgpu_sync_free(struct amdgpu_device *adev, |
| 358 | struct amdgpu_sync *sync, |
Chunming Zhou | 4ce9891 | 2015-08-19 16:41:19 +0800 | [diff] [blame] | 359 | struct fence *fence) |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 360 | { |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 361 | struct amdgpu_sync_entry *e; |
| 362 | struct hlist_node *tmp; |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 363 | unsigned i; |
| 364 | |
Christian König | f91b3a6 | 2015-08-20 14:47:40 +0800 | [diff] [blame] | 365 | hash_for_each_safe(sync->fences, i, tmp, e, node) { |
| 366 | hash_del(&e->node); |
| 367 | fence_put(e->fence); |
| 368 | kfree(e); |
| 369 | } |
| 370 | |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 371 | for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) |
| 372 | amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); |
| 373 | |
| 374 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
| 375 | amdgpu_fence_unref(&sync->sync_to[i]); |
| 376 | |
Chunming Zhou | 3c62338 | 2015-08-20 18:33:59 +0800 | [diff] [blame] | 377 | fence_put(sync->last_vm_update); |
Alex Deucher | d38ceaf | 2015-04-20 16:55:21 -0400 | [diff] [blame] | 378 | } |