drm/radeon: track VM update fences separately

Note for each fence if it's a VM page table update or not. This allows
us to determine the last VM update in a sync object and so to figure
out if we need to flush the TLB or not.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7cda6d7..61b2eea 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -360,14 +360,15 @@
 };
 
 struct radeon_fence {
-	struct fence base;
+	struct fence		base;
 
-	struct radeon_device		*rdev;
-	uint64_t			seq;
+	struct radeon_device	*rdev;
+	uint64_t		seq;
 	/* RB, DMA, etc. */
-	unsigned			ring;
+	unsigned		ring;
+	bool			is_vm_update;
 
-	wait_queue_t			fence_wake;
+	wait_queue_t		fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -594,6 +595,7 @@
 struct radeon_sync {
 	struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
 	struct radeon_fence	*sync_to[RADEON_NUM_RINGS];
+	struct radeon_fence	*last_vm_update;
 };
 
 void radeon_sync_create(struct radeon_sync *sync);
@@ -926,8 +928,8 @@
 	struct mutex			mutex;
 	/* last fence for cs using this vm */
 	struct radeon_fence		*fence;
-	/* last flush or NULL if we still need to flush */
-	struct radeon_fence		*last_flush;
+	/* last flushed PD/PT update */
+	struct radeon_fence		*flushed_updates;
 	/* last use of vmid */
 	struct radeon_fence		*last_id_use;
 };
@@ -2975,7 +2977,7 @@
 				       struct radeon_vm *vm, int ring);
 void radeon_vm_flush(struct radeon_device *rdev,
                      struct radeon_vm *vm,
-                     int ring);
+		     int ring, struct radeon_fence *fence);
 void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 9951670..d13d1b5 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -140,6 +140,7 @@
 	(*fence)->rdev = rdev;
 	(*fence)->seq = seq;
 	(*fence)->ring = ring;
+	(*fence)->is_vm_update = false;
 	fence_init(&(*fence)->base, &radeon_fence_ops,
 		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 56a1704..c39ce1f 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -154,7 +154,8 @@
 	}
 
 	if (ib->vm)
-		radeon_vm_flush(rdev, ib->vm, ib->ring);
+		radeon_vm_flush(rdev, ib->vm, ib->ring,
+				ib->sync.last_vm_update);
 
 	if (const_ib) {
 		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index ddd88fb..02ac8a1 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -48,6 +48,8 @@
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i)
 		sync->sync_to[i] = NULL;
+
+	sync->last_vm_update = NULL;
 }
 
 /**
@@ -68,6 +70,11 @@
 
 	other = sync->sync_to[fence->ring];
 	sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+
+	if (fence->is_vm_update) {
+		other = sync->last_vm_update;
+		sync->last_vm_update = radeon_fence_later(fence, other);
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 4ca2779..6ff5741 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -190,7 +190,7 @@
 		return NULL;
 
 	/* we definately need to flush */
-	radeon_fence_unref(&vm->last_flush);
+	vm->pd_gpu_addr = ~0ll;
 
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -228,6 +228,7 @@
  * @rdev: radeon_device pointer
  * @vm: vm we want to flush
  * @ring: ring to use for flush
+ * @updates: last vm update that is waited for
  *
  * Flush the vm (cayman+).
  *
@@ -235,13 +236,16 @@
  */
 void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
-		     int ring)
+		     int ring, struct radeon_fence *updates)
 {
 	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
 
-	/* if we can't remember our last VM flush then flush now! */
-	if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
+	if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates ||
+	    radeon_fence_is_earlier(vm->flushed_updates, updates)) {
+
 		trace_radeon_vm_flush(pd_addr, ring, vm->id);
+		radeon_fence_unref(&vm->flushed_updates);
+		vm->flushed_updates = radeon_fence_ref(updates);
 		vm->pd_gpu_addr = pd_addr;
 		radeon_ring_vm_flush(rdev, &rdev->ring[ring],
 				     vm->id, vm->pd_gpu_addr);
@@ -272,10 +276,6 @@
 
 	radeon_fence_unref(&vm->last_id_use);
 	vm->last_id_use = radeon_fence_ref(fence);
-
-        /* we just flushed the VM, remember that */
-        if (!vm->last_flush)
-                vm->last_flush = radeon_fence_ref(fence);
 }
 
 /**
@@ -418,6 +418,7 @@
 	if (r)
 		goto error_free;
 
+	ib.fence->is_vm_update = true;
 	radeon_bo_fence(bo, ib.fence, false);
 
 error_free:
@@ -697,10 +698,10 @@
 			radeon_ib_free(rdev, &ib);
 			return r;
 		}
+		ib.fence->is_vm_update = true;
 		radeon_bo_fence(pd, ib.fence, false);
 		radeon_fence_unref(&vm->fence);
 		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
 	}
 	radeon_ib_free(rdev, &ib);
 
@@ -989,11 +990,11 @@
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
+	ib.fence->is_vm_update = true;
 	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
 	radeon_fence_unref(&vm->fence);
 	vm->fence = radeon_fence_ref(ib.fence);
 	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);
 
 	return 0;
 }
@@ -1124,7 +1125,7 @@
 	vm->id = 0;
 	vm->ib_bo_va = NULL;
 	vm->fence = NULL;
-	vm->last_flush = NULL;
+	vm->flushed_updates = NULL;
 	vm->last_id_use = NULL;
 	mutex_init(&vm->mutex);
 	vm->va = RB_ROOT;
@@ -1196,7 +1197,7 @@
 	radeon_bo_unref(&vm->page_directory);
 
 	radeon_fence_unref(&vm->fence);
-	radeon_fence_unref(&vm->last_flush);
+	radeon_fence_unref(&vm->flushed_updates);
 	radeon_fence_unref(&vm->last_id_use);
 
 	mutex_destroy(&vm->mutex);