drm/nouveau: rework vram init/fini ordering a little

Commit "drm/nouveau: add some debug output if nouveau_mm busy at destroy time"
revealed an issue where vram mm takedown would actually fail due to there
still being nodes present, causing nouveau to leak a small amount of memory
on module unload.

This splits TTM/nouveau_mm a bit more cleanly and ensures nouveau_mm fini
isn't done until all gpuobjs are also destroyed.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index bbea045..d610edb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -504,7 +504,10 @@
 };
 
 struct nouveau_vram_engine {
+	struct nouveau_mm *mm;
+
 	int  (*init)(struct drm_device *);
+	void (*takedown)(struct drm_device *dev);
 	int  (*get)(struct drm_device *, u64, u32 align, u32 size_nc,
 		    u32 type, struct nouveau_mem **);
 	void (*put)(struct drm_device *, struct nouveau_mem **);
@@ -717,7 +720,6 @@
 	/* VRAM/fb configuration */
 	uint64_t vram_size;
 	uint64_t vram_sys_base;
-	u32 vram_rblock_size;
 
 	uint64_t fb_phys;
 	uint64_t fb_available_size;
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 976887d..765f0e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -451,10 +451,6 @@
 		dev_priv->ramin_rsvd_vram = 512 * 1024;
 	}
 
-	ret = dev_priv->engine.vram.init(dev);
-	if (ret)
-		return ret;
-
 	NV_INFO(dev, "Detected %dMiB VRAM\n", (int)(dev_priv->vram_size >> 20));
 	if (dev_priv->vram_sys_base) {
 		NV_INFO(dev, "Stolen system memory at: 0x%010llx\n",
@@ -729,36 +725,16 @@
 }
 
 static int
-nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long p_size)
+nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
-	struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
-	struct nouveau_mm *mm;
-	u64 size, block, rsvd;
-	int ret;
-
-	rsvd  = (256 * 1024); /* vga memory */
-	size  = (p_size << PAGE_SHIFT) - rsvd;
-	block = dev_priv->vram_rblock_size;
-
-	ret = nouveau_mm_init(&mm, rsvd >> 12, size >> 12, block >> 12);
-	if (ret)
-		return ret;
-
-	man->priv = mm;
+	/* nothing to do */
 	return 0;
 }
 
 static int
 nouveau_vram_manager_fini(struct ttm_mem_type_manager *man)
 {
-	struct nouveau_mm *mm = man->priv;
-	int ret;
-
-	ret = nouveau_mm_fini(&mm);
-	if (ret)
-		return ret;
-
-	man->priv = NULL;
+	/* nothing to do */
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h
index 1f7483a..b9c016d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.h
@@ -52,6 +52,7 @@
 void nouveau_mm_put(struct nouveau_mm *, struct nouveau_mm_node *);
 
 int  nv50_vram_init(struct drm_device *);
+void nv50_vram_fini(struct drm_device *);
 int  nv50_vram_new(struct drm_device *, u64 size, u32 align, u32 size_nc,
 		    u32 memtype, struct nouveau_mem **);
 void nv50_vram_del(struct drm_device *, struct nouveau_mem **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 27d2a816..49196fa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -91,6 +91,7 @@
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
 		engine->vram.init		= nouveau_mem_detect;
+		engine->vram.takedown		= nouveau_stub_takedown;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
 	case 0x10:
@@ -139,6 +140,7 @@
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
 		engine->vram.init		= nouveau_mem_detect;
+		engine->vram.takedown		= nouveau_stub_takedown;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
 	case 0x20:
@@ -187,6 +189,7 @@
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
 		engine->vram.init		= nouveau_mem_detect;
+		engine->vram.takedown		= nouveau_stub_takedown;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
 	case 0x30:
@@ -237,6 +240,7 @@
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->vram.init		= nouveau_mem_detect;
+		engine->vram.takedown		= nouveau_stub_takedown;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
 	case 0x40:
@@ -289,6 +293,7 @@
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->pm.temp_get		= nv40_temp_get;
 		engine->vram.init		= nouveau_mem_detect;
+		engine->vram.takedown		= nouveau_stub_takedown;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
 	case 0x50:
@@ -366,6 +371,7 @@
 		else
 			engine->pm.temp_get	= nv40_temp_get;
 		engine->vram.init		= nv50_vram_init;
+		engine->vram.takedown		= nv50_vram_fini;
 		engine->vram.get		= nv50_vram_new;
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nv50_vram_flags_valid;
@@ -412,6 +418,7 @@
 		engine->gpio.irq_unregister	= nv50_gpio_irq_unregister;
 		engine->gpio.irq_enable		= nv50_gpio_irq_enable;
 		engine->vram.init		= nvc0_vram_init;
+		engine->vram.takedown		= nv50_vram_fini;
 		engine->vram.get		= nvc0_vram_new;
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nvc0_vram_flags_valid;
@@ -529,7 +536,7 @@
 
 	nouveau_pm_init(dev);
 
-	ret = nouveau_mem_vram_init(dev);
+	ret = engine->vram.init(dev);
 	if (ret)
 		goto out_bios;
 
@@ -541,10 +548,14 @@
 	if (ret)
 		goto out_gpuobj;
 
-	ret = nouveau_mem_gart_init(dev);
+	ret = nouveau_mem_vram_init(dev);
 	if (ret)
 		goto out_instmem;
 
+	ret = nouveau_mem_gart_init(dev);
+	if (ret)
+		goto out_ttmvram;
+
 	/* PMC */
 	ret = engine->mc.init(dev);
 	if (ret)
@@ -698,12 +709,14 @@
 	engine->mc.takedown(dev);
 out_gart:
 	nouveau_mem_gart_fini(dev);
+out_ttmvram:
+	nouveau_mem_vram_fini(dev);
 out_instmem:
 	engine->instmem.takedown(dev);
 out_gpuobj:
 	nouveau_gpuobj_takedown(dev);
 out_vram:
-	nouveau_mem_vram_fini(dev);
+	engine->vram.takedown(dev);
 out_bios:
 	nouveau_pm_fini(dev);
 	nouveau_bios_takedown(dev);
@@ -755,10 +768,11 @@
 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
 	mutex_unlock(&dev->struct_mutex);
 	nouveau_mem_gart_fini(dev);
+	nouveau_mem_vram_fini(dev);
 
 	engine->instmem.takedown(dev);
 	nouveau_gpuobj_takedown(dev);
-	nouveau_mem_vram_fini(dev);
+	engine->vram.takedown(dev);
 
 	nouveau_irq_fini(dev);
 	drm_vblank_cleanup(dev);
diff --git a/drivers/gpu/drm/nouveau/nv50_vram.c b/drivers/gpu/drm/nouveau/nv50_vram.c
index ffbc3d8..af32dae 100644
--- a/drivers/gpu/drm/nouveau/nv50_vram.c
+++ b/drivers/gpu/drm/nouveau/nv50_vram.c
@@ -51,9 +51,7 @@
 nv50_vram_del(struct drm_device *dev, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
-	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
-	struct nouveau_mm *mm = man->priv;
+	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *this;
 	struct nouveau_mem *mem;
 
@@ -84,9 +82,7 @@
 	      u32 memtype, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
-	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
-	struct nouveau_mm *mm = man->priv;
+	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
@@ -190,22 +186,35 @@
 nv50_vram_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
+	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
+	u32 rblock, length;
 
 	dev_priv->vram_size  = nv_rd32(dev, 0x10020c);
 	dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32;
 	dev_priv->vram_size &= 0xffffffff00ULL;
 
-	switch (dev_priv->chipset) {
-	case 0xaa:
-	case 0xac:
-	case 0xaf:
+	/* IGPs, no funky reordering happens here, they don't have VRAM */
+	if (dev_priv->chipset == 0xaa ||
+	    dev_priv->chipset == 0xac ||
+	    dev_priv->chipset == 0xaf) {
 		dev_priv->vram_sys_base = (u64)nv_rd32(dev, 0x100e10) << 12;
-		dev_priv->vram_rblock_size = 4096;
-		break;
-	default:
-		dev_priv->vram_rblock_size = nv50_vram_rblock(dev);
-		break;
+		rblock = 4096 >> 12;
+	} else {
+		rblock = nv50_vram_rblock(dev) >> 12;
 	}
 
-	return 0;
+	length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+
+	return nouveau_mm_init(&vram->mm, rsvd_head, length, rblock);
+}
+
+void
+nv50_vram_fini(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
+
+	nouveau_mm_fini(&vram->mm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c
index 67c6ec6..e45a24d 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vram.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vram.c
@@ -61,9 +61,7 @@
 	      u32 type, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
-	struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM];
-	struct nouveau_mm *mm = man->priv;
+	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int ret;
@@ -105,9 +103,15 @@
 nvc0_vram_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
+	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
+	u32 length;
 
 	dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
 	dev_priv->vram_size *= nv_rd32(dev, 0x121c74);
-	dev_priv->vram_rblock_size = 4096;
-	return 0;
+
+	length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+
+	return nouveau_mm_init(&vram->mm, rsvd_head, length, 1);
 }