drm/nouveau/ttm: untangle code to support accelerated buffer moves

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6d66314..6e78b1a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -36,6 +36,7 @@
 #include "nouveau_mm.h"
 #include "nouveau_vm.h"
 #include "nouveau_fence.h"
+#include "nouveau_ramht.h"
 
 #include <linux/log2.h>
 #include <linux/slab.h>
@@ -511,6 +512,17 @@
 }
 
 static int
+nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
+		OUT_RING  (chan, handle);
+	}
+	return ret;
+}
+
+static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
 {
@@ -528,17 +540,17 @@
 		if (ret)
 			return ret;
 
-		BEGIN_NVC0(chan, NvSubM2MF, 0x0238, 2);
+		BEGIN_NVC0(chan, NvSubCopy, 0x0238, 2);
 		OUT_RING  (chan, upper_32_bits(dst_offset));
 		OUT_RING  (chan, lower_32_bits(dst_offset));
-		BEGIN_NVC0(chan, NvSubM2MF, 0x030c, 6);
+		BEGIN_NVC0(chan, NvSubCopy, 0x030c, 6);
 		OUT_RING  (chan, upper_32_bits(src_offset));
 		OUT_RING  (chan, lower_32_bits(src_offset));
 		OUT_RING  (chan, PAGE_SIZE); /* src_pitch */
 		OUT_RING  (chan, PAGE_SIZE); /* dst_pitch */
 		OUT_RING  (chan, PAGE_SIZE); /* line_length */
 		OUT_RING  (chan, line_count);
-		BEGIN_NVC0(chan, NvSubM2MF, 0x0300, 1);
+		BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
 		OUT_RING  (chan, 0x00100110);
 
 		page_count -= line_count;
@@ -550,6 +562,28 @@
 }
 
 static int
+nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
+					 &chan->m2mf_ntfy);
+	if (ret == 0) {
+		ret = RING_SPACE(chan, 6);
+		if (ret == 0) {
+			BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
+			OUT_RING  (chan, handle);
+			BEGIN_NV04(chan, NvSubCopy, 0x0180, 3);
+			OUT_RING  (chan, NvNotify0);
+			OUT_RING  (chan, NvDmaFB);
+			OUT_RING  (chan, NvDmaFB);
+		} else {
+			nouveau_ramht_remove(chan, NvNotify0);
+		}
+	}
+
+	return ret;
+}
+
+static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
 {
@@ -573,7 +607,7 @@
 			if (ret)
 				return ret;
 
-			BEGIN_NV04(chan, NvSubM2MF, 0x0200, 7);
+			BEGIN_NV04(chan, NvSubCopy, 0x0200, 7);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, stride);
@@ -586,7 +620,7 @@
 			if (ret)
 				return ret;
 
-			BEGIN_NV04(chan, NvSubM2MF, 0x0200, 1);
+			BEGIN_NV04(chan, NvSubCopy, 0x0200, 1);
 			OUT_RING  (chan, 1);
 		}
 		if (old_mem->mem_type == TTM_PL_VRAM &&
@@ -595,7 +629,7 @@
 			if (ret)
 				return ret;
 
-			BEGIN_NV04(chan, NvSubM2MF, 0x021c, 7);
+			BEGIN_NV04(chan, NvSubCopy, 0x021c, 7);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, stride);
@@ -608,7 +642,7 @@
 			if (ret)
 				return ret;
 
-			BEGIN_NV04(chan, NvSubM2MF, 0x021c, 1);
+			BEGIN_NV04(chan, NvSubCopy, 0x021c, 1);
 			OUT_RING  (chan, 1);
 		}
 
@@ -616,10 +650,10 @@
 		if (ret)
 			return ret;
 
-		BEGIN_NV04(chan, NvSubM2MF, 0x0238, 2);
+		BEGIN_NV04(chan, NvSubCopy, 0x0238, 2);
 		OUT_RING  (chan, upper_32_bits(src_offset));
 		OUT_RING  (chan, upper_32_bits(dst_offset));
-		BEGIN_NV04(chan, NvSubM2MF, 0x030c, 8);
+		BEGIN_NV04(chan, NvSubCopy, 0x030c, 8);
 		OUT_RING  (chan, lower_32_bits(src_offset));
 		OUT_RING  (chan, lower_32_bits(dst_offset));
 		OUT_RING  (chan, stride);
@@ -628,7 +662,7 @@
 		OUT_RING  (chan, height);
 		OUT_RING  (chan, 0x00000101);
 		OUT_RING  (chan, 0x00000000);
-		BEGIN_NV04(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+		BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
 		OUT_RING  (chan, 0);
 
 		length -= amount;
@@ -639,6 +673,24 @@
 	return 0;
 }
 
+static int
+nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
+					 &chan->m2mf_ntfy);
+	if (ret == 0) {
+		ret = RING_SPACE(chan, 4);
+		if (ret == 0) {
+			BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
+			OUT_RING  (chan, handle);
+			BEGIN_NV04(chan, NvSubCopy, 0x0180, 1);
+			OUT_RING  (chan, NvNotify0);
+		}
+	}
+
+	return ret;
+}
+
 static inline uint32_t
 nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
 		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
@@ -661,7 +713,7 @@
 	if (ret)
 		return ret;
 
-	BEGIN_NV04(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
+	BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
 	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
 	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
 
@@ -673,7 +725,7 @@
 		if (ret)
 			return ret;
 
-		BEGIN_NV04(chan, NvSubM2MF,
+		BEGIN_NV04(chan, NvSubCopy,
 				 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
 		OUT_RING  (chan, src_offset);
 		OUT_RING  (chan, dst_offset);
@@ -683,7 +735,7 @@
 		OUT_RING  (chan, line_count);
 		OUT_RING  (chan, 0x00000101);
 		OUT_RING  (chan, 0x00000000);
-		BEGIN_NV04(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+		BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
 		OUT_RING  (chan, 0);
 
 		page_count -= line_count;
@@ -743,16 +795,7 @@
 			goto out;
 	}
 
-	if (dev_priv->card_type < NV_50)
-		ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
-	else
-	if (dev_priv->card_type < NV_C0)
-		ret = nv50_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
-	else
-	if (dev_priv->card_type < NV_E0)
-		ret = nvc0_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
-	else
-		ret = nve0_bo_move_copy(chan, bo, &bo->mem, new_mem);
+	ret = dev_priv->ttm.move(chan, bo, &bo->mem, new_mem);
 	if (ret == 0) {
 		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
 						    no_wait_reserve,
@@ -764,6 +807,42 @@
 	return ret;
 }
 
+void
+nouveau_bo_move_init(struct nouveau_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	static const struct {
+		const char *name;
+		u32 oclass;
+		int (*exec)(struct nouveau_channel *,
+			    struct ttm_buffer_object *,
+			    struct ttm_mem_reg *, struct ttm_mem_reg *);
+		int (*init)(struct nouveau_channel *, u32 handle);
+	} _methods[] = {
+		{  "COPY", 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{  "M2MF", 0x9039, nvc0_bo_move_m2mf, nvc0_bo_move_init },
+		{  "M2MF", 0x5039, nv50_bo_move_m2mf, nv50_bo_move_init },
+		{  "M2MF", 0x0039, nv04_bo_move_m2mf, nv04_bo_move_init },
+		{}
+	}, *mthd = _methods;
+	const char *name = "CPU";
+	int ret;
+
+	do {
+		ret = nouveau_gpuobj_gr_new(chan, mthd->oclass, mthd->oclass);
+		if (ret == 0) {
+			ret = mthd->init(chan, mthd->oclass);
+			if (ret == 0) {
+				dev_priv->ttm.move = mthd->exec;
+				name = mthd->name;
+				break;
+			}
+		}
+	} while ((++mthd)->exec);
+
+	NV_INFO(chan->dev, "MM: using %s for buffer copies\n", name);
+}
+
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		      bool no_wait_reserve, bool no_wait_gpu,
@@ -920,8 +999,8 @@
 		goto out;
 	}
 
-	/* Software copy if the card isn't up and running yet. */
-	if (!dev_priv->channel) {
+	/* CPU copy if we have no accelerated method available */
+	if (!dev_priv->ttm.move) {
 		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 		goto out;
 	}