drm/amdgpu: implement the allocation range (v3)

Pass a ttm_placement pointer to amdgpu_bo_create_restricted
add min_offset to amdgpu_bo_pin_restricted.  This makes it
easier to allocate memory with address restrictions.  With
this patch we can also enable 2-ended allocation again.

v2: fix rebase conflicts
v3: memset placements before using

Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 80f0bea..8eb5c55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -43,6 +43,7 @@
 #include <ttm/ttm_execbuf_util.h>
 
 #include <drm/drm_gem.h>
+#include <drm/amdgpu_drm.h>
 
 #include "amd_shared.h"
 #include "amdgpu_family.h"
@@ -542,12 +543,14 @@
 	struct amdgpu_bo		*bo;
 };
 
+#define AMDGPU_GEM_DOMAIN_MAX		0x3
+
 struct amdgpu_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
 	/* Protected by tbo.reserved */
 	u32				initial_domain;
-	struct ttm_place		placements[4];
+	struct ttm_place		placements[AMDGPU_GEM_DOMAIN_MAX + 1];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index f22c067..b16b925 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -159,7 +159,7 @@
 		goto cleanup;
 	}
 
-	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, &base);
+	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
 	if (unlikely(r != 0)) {
 		amdgpu_bo_unreserve(new_rbo);
 		r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 73b7aad..c1645d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -150,7 +150,7 @@
 	}
 
 
-	ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL);
+	ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, NULL);
 	if (ret) {
 		amdgpu_bo_unreserve(rbo);
 		goto out_unref;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index dcc6af9..62cabfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -41,13 +41,13 @@
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 
 static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
-						struct ttm_mem_reg * mem)
+						struct ttm_mem_reg *mem)
 {
 	u64 ret = 0;
 	if (mem->start << PAGE_SHIFT < adev->mc.visible_vram_size) {
 		ret = (u64)((mem->start << PAGE_SHIFT) + mem->size) >
 			   adev->mc.visible_vram_size ?
-			   adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT):
+			   adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
 			   mem->size;
 	}
 	return ret;
@@ -112,82 +112,111 @@
 	return false;
 }
 
-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain)
+static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
+				      struct ttm_placement *placement,
+				      struct ttm_place *placements,
+				      u32 domain, u64 flags)
 {
 	u32 c = 0, i;
-	rbo->placement.placement = rbo->placements;
-	rbo->placement.busy_placement = rbo->placements;
+
+	placement->placement = placements;
+	placement->busy_placement = placements;
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
-		if (rbo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
-			rbo->adev->mc.visible_vram_size < rbo->adev->mc.real_vram_size) {
-			rbo->placements[c].fpfn =
-				rbo->adev->mc.visible_vram_size >> PAGE_SHIFT;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
-										 TTM_PL_FLAG_VRAM;
+		if (flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
+			adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+			placements[c].fpfn =
+				adev->mc.visible_vram_size >> PAGE_SHIFT;
+			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+				TTM_PL_FLAG_VRAM;
 		}
-		rbo->placements[c].fpfn = 0;
-		rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
-									 TTM_PL_FLAG_VRAM;
+		placements[c].fpfn = 0;
+		placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			TTM_PL_FLAG_VRAM;
 	}
 
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
-		if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT |
-										 TTM_PL_FLAG_UNCACHED;
+		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
+			placements[c].fpfn = 0;
+			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT |
+				TTM_PL_FLAG_UNCACHED;
 		} else {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+			placements[c].fpfn = 0;
+			placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
 		}
 	}
 
 	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
-		if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM |
-										 TTM_PL_FLAG_UNCACHED;
+		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
+			placements[c].fpfn = 0;
+			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM |
+				TTM_PL_FLAG_UNCACHED;
 		} else {
-			rbo->placements[c].fpfn = 0;
-			rbo->placements[c++].flags =  TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+			placements[c].fpfn = 0;
+			placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
 		}
 	}
 
 	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
-		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
-					AMDGPU_PL_FLAG_GDS;
+		placements[c].fpfn = 0;
+		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+			AMDGPU_PL_FLAG_GDS;
 	}
 	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
-		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
-					AMDGPU_PL_FLAG_GWS;
+		placements[c].fpfn = 0;
+		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+			AMDGPU_PL_FLAG_GWS;
 	}
 	if (domain & AMDGPU_GEM_DOMAIN_OA) {
-		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
-					AMDGPU_PL_FLAG_OA;
+		placements[c].fpfn = 0;
+		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+			AMDGPU_PL_FLAG_OA;
 	}
 
 	if (!c) {
-		rbo->placements[c].fpfn = 0;
-		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
-					TTM_PL_FLAG_SYSTEM;
+		placements[c].fpfn = 0;
+		placements[c++].flags = TTM_PL_MASK_CACHING |
+			TTM_PL_FLAG_SYSTEM;
 	}
-	rbo->placement.num_placement = c;
-	rbo->placement.num_busy_placement = c;
+	placement->num_placement = c;
+	placement->num_busy_placement = c;
 
 	for (i = 0; i < c; i++) {
-		if ((rbo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
-			(rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-			!rbo->placements[i].fpfn)
-			rbo->placements[i].lpfn =
-				rbo->adev->mc.visible_vram_size >> PAGE_SHIFT;
+		if ((flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+			(placements[i].flags & TTM_PL_FLAG_VRAM) &&
+			!placements[i].fpfn)
+			placements[i].lpfn =
+				adev->mc.visible_vram_size >> PAGE_SHIFT;
 		else
-			rbo->placements[i].lpfn = 0;
+			placements[i].lpfn = 0;
 	}
 }
 
-int amdgpu_bo_create(struct amdgpu_device *adev,
-		     unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags,
-		     struct sg_table *sg, struct amdgpu_bo **bo_ptr)
+void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain)
+{
+	amdgpu_ttm_placement_init(rbo->adev, &rbo->placement,
+				  rbo->placements, domain, rbo->flags);
+}
+
+static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
+					struct ttm_placement *placement)
+{
+	BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1));
+
+	memcpy(bo->placements, placement->placement,
+	       placement->num_placement * sizeof(struct ttm_place));
+	bo->placement.num_placement = placement->num_placement;
+	bo->placement.num_busy_placement = placement->num_busy_placement;
+	bo->placement.placement = bo->placements;
+	bo->placement.busy_placement = bo->placements;
+}
+
+int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
+				unsigned long size, int byte_align,
+				bool kernel, u32 domain, u64 flags,
+				struct sg_table *sg,
+				struct ttm_placement *placement,
+				struct amdgpu_bo **bo_ptr)
 {
 	struct amdgpu_bo *bo;
 	enum ttm_bo_type type;
@@ -241,7 +270,7 @@
 				       AMDGPU_GEM_DOMAIN_OA);
 
 	bo->flags = flags;
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_fill_placement_to_bo(bo, placement);
 	/* Kernel allocation are uninterruptible */
 	down_read(&adev->pm.mclk_lock);
 	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -258,6 +287,27 @@
 	return 0;
 }
 
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     unsigned long size, int byte_align,
+		     bool kernel, u32 domain, u64 flags,
+		     struct sg_table *sg, struct amdgpu_bo **bo_ptr)
+{
+	struct ttm_placement placement = {0};
+	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+
+	memset(&placements, 0,
+	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
+
+	amdgpu_ttm_placement_init(adev, &placement,
+				  placements, domain, flags);
+
+	return amdgpu_bo_create_restricted(adev, size, byte_align,
+					   kernel, domain, flags,
+					   sg,
+					   &placement,
+					   bo_ptr);
+}
+
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
 	bool is_iomem;
@@ -313,14 +363,19 @@
 		*bo = NULL;
 }
 
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset,
+int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+			     u64 min_offset, u64 max_offset,
 			     u64 *gpu_addr)
 {
 	int r, i;
+	unsigned fpfn, lpfn;
 
 	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
 		return -EPERM;
 
+	if (WARN_ON_ONCE(min_offset > max_offset))
+		return -EINVAL;
+
 	if (bo->pin_count) {
 		bo->pin_count++;
 		if (gpu_addr)
@@ -328,7 +383,6 @@
 
 		if (max_offset != 0) {
 			u64 domain_start;
-
 			if (domain == AMDGPU_GEM_DOMAIN_VRAM)
 				domain_start = bo->adev->mc.vram_start;
 			else
@@ -343,13 +397,21 @@
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-			!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
-			(!max_offset || max_offset > bo->adev->mc.visible_vram_size))
-			bo->placements[i].lpfn =
-				bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
-		else
-			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
-
+		    !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
+		    (!max_offset || max_offset > bo->adev->mc.visible_vram_size)) {
+			if (WARN_ON_ONCE(min_offset >
+					 bo->adev->mc.visible_vram_size))
+				return -EINVAL;
+			fpfn = min_offset >> PAGE_SHIFT;
+			lpfn = bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
+		} else {
+			fpfn = min_offset >> PAGE_SHIFT;
+			lpfn = max_offset >> PAGE_SHIFT;
+		}
+		if (fpfn > bo->placements[i].fpfn)
+			bo->placements[i].fpfn = fpfn;
+		if (lpfn && lpfn < bo->placements[i].lpfn)
+			bo->placements[i].lpfn = lpfn;
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	}
 
@@ -370,7 +432,7 @@
 
 int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
 {
-	return amdgpu_bo_pin_restricted(bo, domain, 0, gpu_addr);
+	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
 }
 
 int amdgpu_bo_unpin(struct amdgpu_bo *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index b1e0a03c..675bdc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -130,13 +130,20 @@
 			    bool kernel, u32 domain, u64 flags,
 			    struct sg_table *sg,
 			    struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
+				unsigned long size, int byte_align,
+				bool kernel, u32 domain, u64 flags,
+				struct sg_table *sg,
+				struct ttm_placement *placement,
+				struct amdgpu_bo **bo_ptr);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
 void amdgpu_bo_unref(struct amdgpu_bo **bo);
 int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr);
 int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
-				    u64 max_offset, u64 *gpu_addr);
+			     u64 min_offset, u64 max_offset,
+			     u64 *gpu_addr);
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
 void amdgpu_bo_force_delete(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index da9a4b9..926c8e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2553,7 +2553,7 @@
 	if (unlikely(ret != 0))
 		goto fail;
 	ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM,
-				       0, &gpu_addr);
+				       0, 0, &gpu_addr);
 	amdgpu_bo_unreserve(robj);
 	if (ret)
 		goto fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index edd9d17..bc60fd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2552,7 +2552,7 @@
 	if (unlikely(ret != 0))
 		goto fail;
 	ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM,
-				       0, &gpu_addr);
+				       0, 0, &gpu_addr);
 	amdgpu_bo_unreserve(robj);
 	if (ret)
 		goto fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 1d291f1d..9e8b9f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2496,7 +2496,7 @@
 	if (unlikely(ret != 0))
 		goto fail;
 	ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM,
-				       0, &gpu_addr);
+				       0, 0, &gpu_addr);
 	amdgpu_bo_unreserve(robj);
 	if (ret)
 		goto fail;