drm/radeon: fence BO_VAs manually

This allows us to finally remove the VM fence and
so allow concurrent use of it from different engines.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 79f5f5b..3207bb6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -456,6 +456,7 @@
 	struct list_head		bo_list;
 	uint32_t			flags;
 	uint64_t			addr;
+	struct radeon_fence		*last_pt_update;
 	unsigned			ref_count;
 
 	/* protected by vm mutex */
@@ -915,6 +916,8 @@
 };
 
 struct radeon_vm {
+	struct mutex		mutex;
+
 	struct rb_root		va;
 
 	/* BOs moved, but not yet updated in the PT */
@@ -932,10 +935,6 @@
 
 	struct radeon_bo_va	*ib_bo_va;
 
-	struct mutex		mutex;
-	/* last fence for cs using this vm */
-	struct radeon_fence	*fence;
-
 	/* for id and flush management per ring */
 	struct radeon_vm_id	ids[RADEON_NUM_RINGS];
 };
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 30437aa..75f22e5 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -505,6 +505,9 @@
 	if (r)
 		return r;
 
+	radeon_sync_resv(p->rdev, &p->ib.sync, vm->page_directory->tbo.resv,
+			 true);
+
 	r = radeon_vm_clear_freed(rdev, vm);
 	if (r)
 		return r;
@@ -536,6 +539,8 @@
 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 		if (r)
 			return r;
+
+		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 	}
 
 	return radeon_vm_clear_invalids(rdev, vm);
@@ -580,7 +585,6 @@
 			DRM_ERROR("Failed to sync rings: %i\n", r);
 		goto out;
 	}
-	radeon_sync_fence(&parser->ib.sync, vm->fence);
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index e38efe4..f457614 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -275,9 +275,6 @@
 {
 	unsigned vm_id = vm->ids[fence->ring].id;
 
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(fence);
-
 	radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
 	rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
 
@@ -707,8 +704,6 @@
 		}
 		ib.fence->is_vm_update = true;
 		radeon_bo_fence(pd, ib.fence, false);
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
 	}
 	radeon_ib_free(rdev, &ib);
 
@@ -999,8 +994,8 @@
 	}
 	ib.fence->is_vm_update = true;
 	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
+	radeon_fence_unref(&bo_va->last_pt_update);
+	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
 	radeon_ib_free(rdev, &ib);
 
 	return 0;
@@ -1026,6 +1021,7 @@
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 		if (r)
 			return r;
@@ -1084,6 +1080,7 @@
 		bo_va->bo = radeon_bo_ref(bo_va->bo);
 		list_add(&bo_va->vm_status, &vm->freed);
 	} else {
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
 
@@ -1130,8 +1127,6 @@
 	int i, r;
 
 	vm->ib_bo_va = NULL;
-	vm->fence = NULL;
-
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		vm->ids[i].id = 0;
 		vm->ids[i].flushed_updates = NULL;
@@ -1192,11 +1187,13 @@
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
 			radeon_bo_unreserve(bo_va->bo);
+			radeon_fence_unref(&bo_va->last_pt_update);
 			kfree(bo_va);
 		}
 	}
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
 
@@ -1206,8 +1203,6 @@
 
 	radeon_bo_unref(&vm->page_directory);
 
-	radeon_fence_unref(&vm->fence);
-
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		radeon_fence_unref(&vm->ids[i].flushed_updates);
 		radeon_fence_unref(&vm->ids[i].last_id_use);