drm/amdgpu: adjust HDP write queue flushing for tlb invalidation

Separate tlb invalidation and hdp flushing and move the HDP
flush to the caller.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0a4f34a..d0617f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -247,6 +247,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 		}
 	}
 	mb();
+	amdgpu_asic_flush_hdp(adev);
 	amdgpu_gart_flush_gpu_tlb(adev, 0);
 	return 0;
 }
@@ -329,6 +330,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		return r;
 
 	mb();
+	amdgpu_asic_flush_hdp(adev);
 	amdgpu_gart_flush_gpu_tlb(adev, 0);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e..df0f997 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -856,6 +856,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		mb();
+		amdgpu_asic_flush_hdp(adev);
 		amdgpu_gart_flush_gpu_tlb(adev, 0);
 	} else if (params.ib->length_dw == 0) {
 		amdgpu_job_free(job);
@@ -1457,6 +1458,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
 		mb();
+		amdgpu_asic_flush_hdp(adev);
 		amdgpu_gart_flush_gpu_tlb(adev, 0);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5f5eb15..aa06e72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -360,8 +360,6 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 {
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 12e49bd..550abff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -432,9 +432,6 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 {
-	/* flush hdp cache */
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9a170e3..c0ddd0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -607,9 +607,6 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 {
-	/* flush hdp cache */
-	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
 	/* bits 0-15 are the VM contexts0-15 */
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 100ec69..2a565a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -330,9 +330,6 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 	const unsigned eng = 17;
 	unsigned i, j;
 
-	/* flush hdp cache */
-	adev->nbio_funcs->hdp_flush(adev);
-
 	spin_lock(&adev->mc.invalidate_lock);
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {