drm: rework radeon memory map (radeon 1.23)

This code reworks the radeon memory map so it works better
for newer r300 chips and for a lot of older PCI chips.

It really requires a new X driver in order to take advantage of this code.

From: Ben Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/char/drm/r300_cmdbuf.c b/drivers/char/drm/r300_cmdbuf.c
index 9fbadb9..20b6cb3 100644
--- a/drivers/char/drm/r300_cmdbuf.c
+++ b/drivers/char/drm/r300_cmdbuf.c
@@ -242,8 +242,10 @@
 	return 0;
 }
 
-  /* we expect offsets passed to the framebuffer to be either within video memory or
-     within AGP space */
+/*
+ * we expect offsets passed to the framebuffer to be either within video 
+ * memory or within AGP space 
+ */
 static __inline__ int r300_check_offset(drm_radeon_private_t *dev_priv,
 					u32 offset)
 {
@@ -251,11 +253,11 @@
 	   but this value is not being kept.
 	   This code is correct for now (does the same thing as the
 	   code that sets MC_FB_LOCATION) in radeon_cp.c */
-	if ((offset >= dev_priv->fb_location) &&
-	    (offset < dev_priv->gart_vm_start))
+	if (offset >= dev_priv->fb_location &&
+	    offset < (dev_priv->fb_location + dev_priv->fb_size))
 		return 0;
-	if ((offset >= dev_priv->gart_vm_start) &&
-	    (offset < dev_priv->gart_vm_start + dev_priv->gart_size))
+	if (offset >= dev_priv->gart_vm_start &&
+	    offset < (dev_priv->gart_vm_start + dev_priv->gart_size))
 		return 0;
 	return 1;
 }
@@ -490,6 +492,7 @@
 
 	return 0;
 }
+
 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
 					     drm_radeon_kcmd_buffer_t *cmdbuf)
 {
diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c
index 9bb8ae0..cc4942a 100644
--- a/drivers/char/drm/radeon_cp.c
+++ b/drivers/char/drm/radeon_cp.c
@@ -1118,14 +1118,20 @@
 {
 	u32 ring_start, cur_read_ptr;
 	u32 tmp;
-
-	/* Initialize the memory controller */
-	RADEON_WRITE(RADEON_MC_FB_LOCATION,
-		     ((dev_priv->gart_vm_start - 1) & 0xffff0000)
-		     | (dev_priv->fb_location >> 16));
+	
+	/* Initialize the memory controller. With new memory map, the fb location
+	 * is not changed, it should have been properly initialized already. Part
+	 * of the problem is that the code below is bogus, assuming the GART is
+	 * always appended to the fb which is not necessarily the case
+	 */
+	if (!dev_priv->new_memmap)
+		RADEON_WRITE(RADEON_MC_FB_LOCATION,
+			     ((dev_priv->gart_vm_start - 1) & 0xffff0000)
+			     | (dev_priv->fb_location >> 16));
 
 #if __OS_HAS_AGP
 	if (dev_priv->flags & CHIP_IS_AGP) {
+		RADEON_WRITE(RADEON_AGP_BASE, (unsigned int)dev->agp->base);
 		RADEON_WRITE(RADEON_MC_AGP_LOCATION,
 			     (((dev_priv->gart_vm_start - 1 +
 				dev_priv->gart_size) & 0xffff0000) |
@@ -1153,8 +1159,6 @@
 
 #if __OS_HAS_AGP
 	if (dev_priv->flags & CHIP_IS_AGP) {
-		/* set RADEON_AGP_BASE here instead of relying on X from user space */
-		RADEON_WRITE(RADEON_AGP_BASE, (unsigned int)dev->agp->base);
 		RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR,
 			     dev_priv->ring_rptr->offset
 			     - dev->agp->base + dev_priv->gart_vm_start);
@@ -1174,6 +1178,17 @@
 			  entry->handle + tmp_ofs);
 	}
 
+	/* Set ring buffer size */
+#ifdef __BIG_ENDIAN
+	RADEON_WRITE(RADEON_CP_RB_CNTL,
+		     dev_priv->ring.size_l2qw | RADEON_BUF_SWAP_32BIT);
+#else
+	RADEON_WRITE(RADEON_CP_RB_CNTL, dev_priv->ring.size_l2qw);
+#endif
+
+	/* Start with assuming that writeback doesn't work */
+	dev_priv->writeback_works = 0;
+
 	/* Initialize the scratch register pointer.  This will cause
 	 * the scratch register values to be written out to memory
 	 * whenever they are updated.
@@ -1190,7 +1205,38 @@
 
 	RADEON_WRITE(RADEON_SCRATCH_UMSK, 0x7);
 
-	/* Writeback doesn't seem to work everywhere, test it first */
+	/* Turn on bus mastering */
+	tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
+	RADEON_WRITE(RADEON_BUS_CNTL, tmp);
+
+	dev_priv->sarea_priv->last_frame = dev_priv->scratch[0] = 0;
+	RADEON_WRITE(RADEON_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame);
+
+	dev_priv->sarea_priv->last_dispatch = dev_priv->scratch[1] = 0;
+	RADEON_WRITE(RADEON_LAST_DISPATCH_REG,
+		     dev_priv->sarea_priv->last_dispatch);
+
+	dev_priv->sarea_priv->last_clear = dev_priv->scratch[2] = 0;
+	RADEON_WRITE(RADEON_LAST_CLEAR_REG, dev_priv->sarea_priv->last_clear);
+
+	radeon_do_wait_for_idle(dev_priv);
+
+	/* Sync everything up */
+	RADEON_WRITE(RADEON_ISYNC_CNTL,
+		     (RADEON_ISYNC_ANY2D_IDLE3D |
+		      RADEON_ISYNC_ANY3D_IDLE2D |
+		      RADEON_ISYNC_WAIT_IDLEGUI |
+		      RADEON_ISYNC_CPSCRATCH_IDLEGUI));
+
+}
+
+static void radeon_test_writeback(drm_radeon_private_t * dev_priv)
+{
+	u32 tmp;
+
+	/* Writeback doesn't seem to work everywhere, test it here and possibly
+	 * enable it if it appears to work
+	 */
 	DRM_WRITE32(dev_priv->ring_rptr, RADEON_SCRATCHOFF(1), 0);
 	RADEON_WRITE(RADEON_SCRATCH_REG1, 0xdeadbeef);
 
@@ -1203,46 +1249,15 @@
 
 	if (tmp < dev_priv->usec_timeout) {
 		dev_priv->writeback_works = 1;
-		DRM_DEBUG("writeback test succeeded, tmp=%d\n", tmp);
+		DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
 	} else {
 		dev_priv->writeback_works = 0;
-		DRM_DEBUG("writeback test failed\n");
+		DRM_INFO("writeback test failed\n");
 	}
 	if (radeon_no_wb == 1) {
 		dev_priv->writeback_works = 0;
-		DRM_DEBUG("writeback forced off\n");
+		DRM_INFO("writeback forced off\n");
 	}
-
-	dev_priv->sarea_priv->last_frame = dev_priv->scratch[0] = 0;
-	RADEON_WRITE(RADEON_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame);
-
-	dev_priv->sarea_priv->last_dispatch = dev_priv->scratch[1] = 0;
-	RADEON_WRITE(RADEON_LAST_DISPATCH_REG,
-		     dev_priv->sarea_priv->last_dispatch);
-
-	dev_priv->sarea_priv->last_clear = dev_priv->scratch[2] = 0;
-	RADEON_WRITE(RADEON_LAST_CLEAR_REG, dev_priv->sarea_priv->last_clear);
-
-	/* Set ring buffer size */
-#ifdef __BIG_ENDIAN
-	RADEON_WRITE(RADEON_CP_RB_CNTL,
-		     dev_priv->ring.size_l2qw | RADEON_BUF_SWAP_32BIT);
-#else
-	RADEON_WRITE(RADEON_CP_RB_CNTL, dev_priv->ring.size_l2qw);
-#endif
-
-	radeon_do_wait_for_idle(dev_priv);
-
-	/* Turn on bus mastering */
-	tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
-	RADEON_WRITE(RADEON_BUS_CNTL, tmp);
-
-	/* Sync everything up */
-	RADEON_WRITE(RADEON_ISYNC_CNTL,
-		     (RADEON_ISYNC_ANY2D_IDLE3D |
-		      RADEON_ISYNC_ANY3D_IDLE2D |
-		      RADEON_ISYNC_WAIT_IDLEGUI |
-		      RADEON_ISYNC_CPSCRATCH_IDLEGUI));
 }
 
 /* Enable or disable PCI-E GART on the chip */
@@ -1496,6 +1511,9 @@
 
 	dev_priv->fb_location = (RADEON_READ(RADEON_MC_FB_LOCATION)
 				 & 0xffff) << 16;
+	dev_priv->fb_size = 
+		((RADEON_READ(RADEON_MC_FB_LOCATION) & 0xffff0000u) + 0x10000)
+		- dev_priv->fb_location;
 
 	dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
 					((dev_priv->front_offset
@@ -1510,8 +1528,46 @@
 					  + dev_priv->fb_location) >> 10));
 
 	dev_priv->gart_size = init->gart_size;
-	dev_priv->gart_vm_start = dev_priv->fb_location
-	    + RADEON_READ(RADEON_CONFIG_APER_SIZE);
+
+	/* New let's set the memory map ... */
+	if (dev_priv->new_memmap) {
+		u32 base = 0;
+
+		DRM_INFO("Setting GART location based on new memory map\n");
+
+		/* If using AGP, try to locate the AGP aperture at the same
+		 * location in the card and on the bus, though we have to
+		 * align it down.
+		 */
+#if __OS_HAS_AGP
+		if (dev_priv->flags & CHIP_IS_AGP) {
+			base = dev->agp->base;
+			/* Check if valid */
+			if ((base + dev_priv->gart_size) > dev_priv->fb_location &&
+			    base < (dev_priv->fb_location + dev_priv->fb_size)) {
+				DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
+					 dev->agp->base);
+				base = 0;
+			}
+		}
+#endif
+		/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
+		if (base == 0) {
+			base = dev_priv->fb_location + dev_priv->fb_size;
+			if (((base + dev_priv->gart_size) & 0xfffffffful)
+			    < base)
+				base = dev_priv->fb_location
+					- dev_priv->gart_size;
+		}		
+		dev_priv->gart_vm_start = base & 0xffc00000u;
+		if (dev_priv->gart_vm_start != base)
+			DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
+				 base, dev_priv->gart_vm_start);
+	} else {
+		DRM_INFO("Setting GART location based on old memory map\n");
+		dev_priv->gart_vm_start = dev_priv->fb_location +
+			RADEON_READ(RADEON_CONFIG_APER_SIZE);
+	}
 
 #if __OS_HAS_AGP
 	if (dev_priv->flags & CHIP_IS_AGP)
@@ -1596,6 +1652,7 @@
 	dev_priv->last_buf = 0;
 
 	radeon_do_engine_reset(dev);
+	radeon_test_writeback(dev_priv);
 
 	return 0;
 }
diff --git a/drivers/char/drm/radeon_drm.h b/drivers/char/drm/radeon_drm.h
index 9c177a6..bb63e68 100644
--- a/drivers/char/drm/radeon_drm.h
+++ b/drivers/char/drm/radeon_drm.h
@@ -697,6 +697,7 @@
 #define RADEON_SETPARAM_FB_LOCATION    1	/* determined framebuffer location */
 #define RADEON_SETPARAM_SWITCH_TILING  2	/* enable/disable color tiling */
 #define RADEON_SETPARAM_PCIGART_LOCATION 3	/* PCI Gart Location */
+#define RADEON_SETPARAM_NEW_MEMMAP 4		/* Use new memory map */
 
 /* 1.14: Clients can allocate/free a surface
  */
diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h
index 1f7d2ab..a0c1988 100644
--- a/drivers/char/drm/radeon_drv.h
+++ b/drivers/char/drm/radeon_drv.h
@@ -38,7 +38,7 @@
 
 #define DRIVER_NAME		"radeon"
 #define DRIVER_DESC		"ATI Radeon"
-#define DRIVER_DATE		"20051229"
+#define DRIVER_DATE		"20060225"
 
 /* Interface history:
  *
@@ -91,9 +91,10 @@
  * 1.20- Add support for r300 texrect
  * 1.21- Add support for card type getparam
  * 1.22- Add support for texture cache flushes (R300_TX_CNTL)
+ * 1.23- Add new radeon memory map work from benh
  */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		22
+#define DRIVER_MINOR		23
 #define DRIVER_PATCHLEVEL	0
 
 /*
@@ -138,7 +139,8 @@
 	CHIP_IS_PCIE = 0x00200000UL,
 };
 
-#define GET_RING_HEAD(dev_priv)		DRM_READ32(  (dev_priv)->ring_rptr, 0 )
+#define GET_RING_HEAD(dev_priv)	(dev_priv->writeback_works ? \
+        DRM_READ32(  (dev_priv)->ring_rptr, 0 ) : RADEON_READ(RADEON_CP_RB_RPTR))
 #define SET_RING_HEAD(dev_priv,val)	DRM_WRITE32( (dev_priv)->ring_rptr, 0, (val) )
 
 typedef struct drm_radeon_freelist {
@@ -199,6 +201,8 @@
 	drm_radeon_sarea_t *sarea_priv;
 
 	u32 fb_location;
+	u32 fb_size;
+	int new_memmap;
 
 	int gart_size;
 	u32 gart_vm_start;
diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c
index 7bc2751..56e56b8 100644
--- a/drivers/char/drm/radeon_state.c
+++ b/drivers/char/drm/radeon_state.c
@@ -45,22 +45,53 @@
 	u32 off = *offset;
 	struct drm_radeon_driver_file_fields *radeon_priv;
 
-	if (off >= dev_priv->fb_location &&
-	    off < (dev_priv->gart_vm_start + dev_priv->gart_size))
+	/* Hrm ... the story of the offset ... So this function converts
+	 * the various ideas of what userland clients might have for an
+	 * offset in the card address space into an offset into the card
+	 * address space :) So with a sane client, it should just keep
+	 * the value intact and just do some boundary checking. However,
+	 * not all clients are sane. Some older clients pass us 0 based
+	 * offsets relative to the start of the framebuffer and some may
+	 * assume the AGP aperture it appended to the framebuffer, so we
+	 * try to detect those cases and fix them up.
+	 *
+	 * Note: It might be a good idea here to make sure the offset lands
+	 * in some "allowed" area to protect things like the PCIE GART...
+	 */
+
+	/* First, the best case, the offset already lands in either the
+	 * framebuffer or the GART mapped space
+	 */
+	if ((off >= dev_priv->fb_location &&
+	     off < (dev_priv->fb_location + dev_priv->fb_size)) ||
+	    (off >= dev_priv->gart_vm_start &&
+	     off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
 		return 0;
 
-	radeon_priv = filp_priv->driver_priv;
-	off += radeon_priv->radeon_fb_delta;
+	/* Ok, that didn't happen... now check if we have a zero based
+	 * offset that fits in the framebuffer + gart space, apply the
+	 * magic offset we get from SETPARAM or calculated from fb_location
+	 */
+	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
+		radeon_priv = filp_priv->driver_priv;
+		off += radeon_priv->radeon_fb_delta;
+	}
 
-	DRM_DEBUG("offset fixed up to 0x%x\n", off);
+	/* Finally, assume we aimed at a GART offset if beyond the fb */
+	if (off > (dev_priv->fb_location + dev_priv->fb_size))
+		off = off - (dev_priv->fb_location + dev_priv->fb_size) +
+			dev_priv->gart_vm_start;
 
-	if (off < dev_priv->fb_location ||
-	    off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
-		return DRM_ERR(EINVAL);
-
-	*offset = off;
-
-	return 0;
+	/* Now recheck and fail if out of bounds */
+	if ((off >= dev_priv->fb_location &&
+	     off < (dev_priv->fb_location + dev_priv->fb_size)) ||
+	    (off >= dev_priv->gart_vm_start &&
+	     off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
+		DRM_DEBUG("offset fixed up to 0x%x\n", off);
+		*offset = off;
+		return 0;
+	}
+	return DRM_ERR(EINVAL);
 }
 
 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
@@ -3012,6 +3043,9 @@
 	case RADEON_SETPARAM_PCIGART_LOCATION:
 		dev_priv->pcigart_offset = sp.value;
 		break;
+	case RADEON_SETPARAM_NEW_MEMMAP:
+		dev_priv->new_memmap = sp.value;
+		break;
 	default:
 		DRM_DEBUG("Invalid parameter %d\n", sp.param);
 		return DRM_ERR(EINVAL);