drm/nouveau: rework init ordering so nv50_instmem.c can be less bad

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 5c617f8..d932594 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -37,27 +37,82 @@
 	struct nouveau_gpuobj *fb_bar;
 };
 
-#define NV50_INSTMEM_PAGE_SHIFT 12
-#define NV50_INSTMEM_PAGE_SIZE  (1 << NV50_INSTMEM_PAGE_SHIFT)
-#define NV50_INSTMEM_PT_SIZE(a)	(((a) >> 12) << 3)
+static void
+nv50_channel_del(struct nouveau_channel **pchan)
+{
+	struct nouveau_channel *chan;
 
-/*NOTE: - Assumes 0x1700 already covers the correct MiB of PRAMIN
- */
-#define BAR0_WI32(g, o, v) do {                                   \
-	u32 offset = (g)->vinst + (o);                            \
-	nv_wr32(dev, NV_RAMIN + (offset & 0xfffff), (v));         \
-} while (0)
+	chan = *pchan;
+	*pchan = NULL;
+	if (!chan)
+		return;
+
+	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
+	if (chan->ramin_heap.free_stack.next)
+		drm_mm_takedown(&chan->ramin_heap);
+	nouveau_gpuobj_ref(NULL, &chan->ramin);
+	kfree(chan);
+}
+
+static int
+nv50_channel_new(struct drm_device *dev, u32 size,
+		 struct nouveau_channel **pchan)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 pgd = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
+	u32  fc = (dev_priv->chipset == 0x50) ? 0x0000 : 0x4200;
+	struct nouveau_channel *chan;
+	int ret;
+
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+	chan->dev = dev;
+
+	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
+	if (ret) {
+		nv50_channel_del(&chan);
+		return ret;
+	}
+
+	ret = drm_mm_init(&chan->ramin_heap, 0x6000, chan->ramin->size);
+	if (ret) {
+		nv50_channel_del(&chan);
+		return ret;
+	}
+
+	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
+				      chan->ramin->pinst + pgd,
+				      chan->ramin->vinst + pgd,
+				      0x4000, NVOBJ_FLAG_ZERO_ALLOC,
+				      &chan->vm_pd);
+	if (ret) {
+		nv50_channel_del(&chan);
+		return ret;
+	}
+
+	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
+				      chan->ramin->pinst + fc,
+				      chan->ramin->vinst + fc, 0x100,
+				      NVOBJ_FLAG_ZERO_ALLOC, &chan->ramfc);
+	if (ret) {
+		nv50_channel_del(&chan);
+		return ret;
+	}
+
+	*pchan = chan;
+	return 0;
+}
 
 int
 nv50_instmem_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *chan;
-	uint32_t c_offset, c_size, c_ramfc, c_vmpd, c_base, pt_size;
-	uint32_t save_nv001700;
-	uint64_t v;
 	struct nv50_instmem_priv *priv;
+	struct nouveau_channel *chan;
 	int ret, i;
+	u32 tmp;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -68,27 +123,61 @@
 	for (i = 0x1700; i <= 0x1710; i += 4)
 		priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i);
 
-	/* Reserve the last MiB of VRAM, we should probably try to avoid
-	 * setting up the below tables over the top of the VBIOS image at
-	 * some point.
-	 */
-	dev_priv->ramin_rsvd_vram = 1 << 20;
-	c_offset = dev_priv->vram_size - dev_priv->ramin_rsvd_vram;
-	c_size   = 128 << 10;
-	c_vmpd   = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x1400 : 0x200;
-	c_ramfc  = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x0 : 0x20;
-	c_base   = c_vmpd + 0x4000;
-	pt_size  = NV50_INSTMEM_PT_SIZE(dev_priv->ramin_size);
+	/* Global PRAMIN heap */
+	ret = drm_mm_init(&dev_priv->ramin_heap, 0, dev_priv->ramin_size);
+	if (ret) {
+		NV_ERROR(dev, "Failed to init RAMIN heap\n");
+		return -ENOMEM;
+	}
 
-	NV_DEBUG(dev, " Rsvd VRAM base: 0x%08x\n", c_offset);
-	NV_DEBUG(dev, "    VBIOS image: 0x%08x\n",
-				(nv_rd32(dev, 0x619f04) & ~0xff) << 8);
-	NV_DEBUG(dev, "  Aperture size: %d MiB\n", dev_priv->ramin_size >> 20);
-	NV_DEBUG(dev, "        PT size: %d KiB\n", pt_size >> 10);
+	/* we need a channel to plug into the hw to control the BARs */
+	ret = nv50_channel_new(dev, 128*1024, &dev_priv->fifos[0]);
+	if (ret)
+		return ret;
+	chan = dev_priv->fifos[127] = dev_priv->fifos[0];
 
-	/* Determine VM layout, we need to do this first to make sure
-	 * we allocate enough memory for all the page tables.
-	 */
+	/* allocate page table for PRAMIN BAR */
+	ret = nouveau_gpuobj_new(dev, chan, (dev_priv->ramin_size >> 12) * 8,
+				 0x1000, NVOBJ_FLAG_ZERO_ALLOC,
+				 &priv->pramin_pt);
+	if (ret)
+		return ret;
+
+	nv_wo32(chan->vm_pd, 0x0000, priv->pramin_pt->vinst | 0x63);
+	nv_wo32(chan->vm_pd, 0x0004, 0);
+
+	/* DMA object for PRAMIN BAR */
+	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->pramin_bar);
+	if (ret)
+		return ret;
+	nv_wo32(priv->pramin_bar, 0x00, 0x7fc00000);
+	nv_wo32(priv->pramin_bar, 0x04, dev_priv->ramin_size - 1);
+	nv_wo32(priv->pramin_bar, 0x08, 0x00000000);
+	nv_wo32(priv->pramin_bar, 0x0c, 0x00000000);
+	nv_wo32(priv->pramin_bar, 0x10, 0x00000000);
+	nv_wo32(priv->pramin_bar, 0x14, 0x00000000);
+
+	/* map channel into PRAMIN, gpuobj didn't do it for us */
+	ret = nv50_instmem_bind(dev, chan->ramin);
+	if (ret)
+		return ret;
+
+	/* poke regs... */
+	nv_wr32(dev, 0x001704, 0x00000000 | (chan->ramin->vinst >> 12));
+	nv_wr32(dev, 0x001704, 0x40000000 | (chan->ramin->vinst >> 12));
+	nv_wr32(dev, 0x00170c, 0x80000000 | (priv->pramin_bar->cinst >> 4));
+
+	tmp = nv_ri32(dev, 0);
+	nv_wi32(dev, 0, ~tmp);
+	if (nv_ri32(dev, 0) != ~tmp) {
+		NV_ERROR(dev, "PRAMIN readback failed\n");
+		return -EIO;
+	}
+	nv_wi32(dev, 0, tmp);
+
+	dev_priv->ramin_available = true;
+
+	/* Determine VM layout */
 	dev_priv->vm_gart_base = roundup(NV50_VM_BLOCK, NV50_VM_BLOCK);
 	dev_priv->vm_gart_size = NV50_VM_BLOCK;
 
@@ -108,166 +197,39 @@
 		 dev_priv->vm_vram_base,
 		 dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1);
 
-	c_size += dev_priv->vm_vram_pt_nr * (NV50_VM_BLOCK / 65536 * 8);
-
-	/* Map BAR0 PRAMIN aperture over the memory we want to use */
-	save_nv001700 = nv_rd32(dev, NV50_PUNK_BAR0_PRAMIN);
-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (c_offset >> 16));
-
-	/* Create a fake channel, and use it as our "dummy" channels 0/127.
-	 * The main reason for creating a channel is so we can use the gpuobj
-	 * code.  However, it's probably worth noting that NVIDIA also setup
-	 * their channels 0/127 with the same values they configure here.
-	 * So, there may be some other reason for doing this.
-	 *
-	 * Have to create the entire channel manually, as the real channel
-	 * creation code assumes we have PRAMIN access, and we don't until
-	 * we're done here.
-	 */
-	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
-	if (!chan)
-		return -ENOMEM;
-	chan->id = 0;
-	chan->dev = dev;
-	chan->file_priv = (struct drm_file *)-2;
-	dev_priv->fifos[0] = dev_priv->fifos[127] = chan;
-
-	INIT_LIST_HEAD(&chan->ramht_refs);
-
-	/* Channel's PRAMIN object + heap */
-	ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0, &chan->ramin);
-	if (ret)
-		return ret;
-
-	if (drm_mm_init(&chan->ramin_heap, c_base, c_size - c_base))
-		return -ENOMEM;
-
-	/* RAMFC + zero channel's PRAMIN up to start of VM pagedir */
-	ret = nouveau_gpuobj_new_fake(dev, c_ramfc, c_offset + c_ramfc,
-				      0x4000, 0, &chan->ramfc);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < c_vmpd; i += 4)
-		BAR0_WI32(chan->ramin, i, 0);
-
-	/* VM page directory */
-	ret = nouveau_gpuobj_new_fake(dev, c_vmpd, c_offset + c_vmpd,
-				      0x4000, 0, &chan->vm_pd);
-	if (ret)
-		return ret;
-	for (i = 0; i < 0x4000; i += 8) {
-		BAR0_WI32(chan->vm_pd, i + 0x00, 0x00000000);
-		BAR0_WI32(chan->vm_pd, i + 0x04, 0x00000000);
-	}
-
-	/* PRAMIN page table, cheat and map into VM at 0x0000000000.
-	 * We map the entire fake channel into the start of the PRAMIN BAR
-	 */
-	ret = nouveau_gpuobj_new(dev, chan, pt_size, 0x1000, 0,
-				 &priv->pramin_pt);
-	if (ret)
-		return ret;
-
-	v = c_offset | 1;
-	if (dev_priv->vram_sys_base) {
-		v += dev_priv->vram_sys_base;
-		v |= 0x30;
-	}
-
-	i = 0;
-	while (v < dev_priv->vram_sys_base + c_offset + c_size) {
-		BAR0_WI32(priv->pramin_pt, i + 0, lower_32_bits(v));
-		BAR0_WI32(priv->pramin_pt, i + 4, upper_32_bits(v));
-		v += 0x1000;
-		i += 8;
-	}
-
-	while (i < pt_size) {
-		BAR0_WI32(priv->pramin_pt, i + 0, 0x00000000);
-		BAR0_WI32(priv->pramin_pt, i + 4, 0x00000000);
-		i += 8;
-	}
-
-	BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->vinst | 0x63);
-	BAR0_WI32(chan->vm_pd, 0x04, 0x00000000);
-
 	/* VRAM page table(s), mapped into VM at +1GiB  */
 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
-		ret = nouveau_gpuobj_new(dev, chan, NV50_VM_BLOCK / 0x10000 * 8,
-					 0, 0, &chan->vm_vram_pt[i]);
+		ret = nouveau_gpuobj_new(dev, NULL, NV50_VM_BLOCK / 0x10000 * 8,
+					 0, NVOBJ_FLAG_ZERO_ALLOC,
+					 &chan->vm_vram_pt[i]);
 		if (ret) {
-			NV_ERROR(dev, "Error creating VRAM page tables: %d\n",
-									ret);
+			NV_ERROR(dev, "Error creating VRAM PGT: %d\n", ret);
 			dev_priv->vm_vram_pt_nr = i;
 			return ret;
 		}
-		/*XXX: double-check this is ok */
 		dev_priv->vm_vram_pt[i] = chan->vm_vram_pt[i];
 
-		for (v = 0; v < dev_priv->vm_vram_pt[i]->size; v += 4)
-			BAR0_WI32(dev_priv->vm_vram_pt[i], v, 0);
-
-		BAR0_WI32(chan->vm_pd, 0x10 + (i*8),
-			  chan->vm_vram_pt[i]->vinst | 0x61);
-		BAR0_WI32(chan->vm_pd, 0x14 + (i*8), 0);
+		nv_wo32(chan->vm_pd, 0x10 + (i*8),
+			chan->vm_vram_pt[i]->vinst | 0x61);
+		nv_wo32(chan->vm_pd, 0x14 + (i*8), 0);
 	}
 
-	/* DMA object for PRAMIN BAR */
-	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->pramin_bar);
-	if (ret)
-		return ret;
-	BAR0_WI32(priv->pramin_bar, 0x00, 0x7fc00000);
-	BAR0_WI32(priv->pramin_bar, 0x04, dev_priv->ramin_size - 1);
-	BAR0_WI32(priv->pramin_bar, 0x08, 0x00000000);
-	BAR0_WI32(priv->pramin_bar, 0x0c, 0x00000000);
-	BAR0_WI32(priv->pramin_bar, 0x10, 0x00000000);
-	BAR0_WI32(priv->pramin_bar, 0x14, 0x00000000);
-
 	/* DMA object for FB BAR */
 	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->fb_bar);
 	if (ret)
 		return ret;
-	BAR0_WI32(priv->fb_bar, 0x00, 0x7fc00000);
-	BAR0_WI32(priv->fb_bar, 0x04, 0x40000000 +
-				      pci_resource_len(dev->pdev, 1) - 1);
-	BAR0_WI32(priv->fb_bar, 0x08, 0x40000000);
-	BAR0_WI32(priv->fb_bar, 0x0c, 0x00000000);
-	BAR0_WI32(priv->fb_bar, 0x10, 0x00000000);
-	BAR0_WI32(priv->fb_bar, 0x14, 0x00000000);
+	nv_wo32(priv->fb_bar, 0x00, 0x7fc00000);
+	nv_wo32(priv->fb_bar, 0x04, 0x40000000 +
+				    pci_resource_len(dev->pdev, 1) - 1);
+	nv_wo32(priv->fb_bar, 0x08, 0x40000000);
+	nv_wo32(priv->fb_bar, 0x0c, 0x00000000);
+	nv_wo32(priv->fb_bar, 0x10, 0x00000000);
+	nv_wo32(priv->fb_bar, 0x14, 0x00000000);
 
-	/* Poke the relevant regs, and pray it works :) */
-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12));
-	nv_wr32(dev, NV50_PUNK_UNK1710, 0);
-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12) |
-					 NV50_PUNK_BAR_CFG_BASE_VALID);
-	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->cinst >> 4) |
-					NV50_PUNK_BAR1_CTXDMA_VALID);
-	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->cinst >> 4) |
-					NV50_PUNK_BAR3_CTXDMA_VALID);
-
+	nv_wr32(dev, 0x001708, 0x80000000 | (priv->fb_bar->cinst >> 4));
 	for (i = 0; i < 8; i++)
 		nv_wr32(dev, 0x1900 + (i*4), 0);
 
-	dev_priv->ramin_available = true;
-
-	/* Assume that praying isn't enough, check that we can re-read the
-	 * entire fake channel back from the PRAMIN BAR */
-	for (i = 0; i < c_size; i += 4) {
-		if (nv_rd32(dev, NV_RAMIN + i) != nv_ri32(dev, i)) {
-			NV_ERROR(dev, "Error reading back PRAMIN at 0x%08x\n",
-									i);
-			return -EINVAL;
-		}
-	}
-
-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, save_nv001700);
-
-	/* Global PRAMIN heap */
-	if (drm_mm_init(&dev_priv->ramin_heap, c_size, dev_priv->ramin_size - c_size)) {
-		NV_ERROR(dev, "Failed to init RAMIN heap\n");
-	}
-
 	/*XXX: incorrect, but needed to make hash func "work" */
 	dev_priv->ramht_offset = 0x10000;
 	dev_priv->ramht_bits   = 9;
@@ -288,6 +250,8 @@
 	if (!priv)
 		return;
 
+	dev_priv->ramin_available = false;
+
 	/* Restore state from before init */
 	for (i = 0x1700; i <= 0x1710; i += 4)
 		nv_wr32(dev, i, priv->save1700[(i - 0x1700) / 4]);
@@ -302,13 +266,8 @@
 			nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
 		dev_priv->vm_vram_pt_nr = 0;
 
-		nouveau_gpuobj_ref(NULL, &chan->vm_pd);
-		nouveau_gpuobj_ref(NULL, &chan->ramfc);
-		nouveau_gpuobj_ref(NULL, &chan->ramin);
-		drm_mm_takedown(&chan->ramin_heap);
-
-		dev_priv->fifos[0] = dev_priv->fifos[127] = NULL;
-		kfree(chan);
+		nv50_channel_del(&dev_priv->fifos[0]);
+		dev_priv->fifos[127] = NULL;
 	}
 
 	dev_priv->engine.instmem.priv = NULL;
@@ -341,9 +300,11 @@
 	struct nouveau_gpuobj *ramin = chan->ramin;
 	int i;
 
-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (ramin->vinst >> 16));
+	dev_priv->ramin_available = false;
+	dev_priv->ramin_base = ~0;
 	for (i = 0; i < ramin->size; i += 4)
-		BAR0_WI32(ramin, i, ramin->im_backing_suspend[i/4]);
+		nv_wo32(ramin, i, ramin->im_backing_suspend[i/4]);
+	dev_priv->ramin_available = true;
 	vfree(ramin->im_backing_suspend);
 	ramin->im_backing_suspend = NULL;
 
@@ -370,7 +331,7 @@
 	if (gpuobj->im_backing)
 		return -EINVAL;
 
-	*sz = ALIGN(*sz, NV50_INSTMEM_PAGE_SIZE);
+	*sz = ALIGN(*sz, 4096);
 	if (*sz == 0)
 		return -EINVAL;
 
@@ -438,7 +399,7 @@
 	while (pte < pte_end) {
 		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
 		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
-		vram += NV50_INSTMEM_PAGE_SIZE;
+		vram += 0x1000;
 		pte += 2;
 	}
 	dev_priv->engine.instmem.flush(dev);
@@ -460,6 +421,10 @@
 	if (gpuobj->im_bound == 0)
 		return -EINVAL;
 
+	/* can happen during late takedown */
+	if (unlikely(!dev_priv->ramin_available))
+		return 0;
+
 	pte     = (gpuobj->im_pramin->start >> 12) << 1;
 	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;