drm: add radeon PCI express support

Add support for Radeon PCI Express cards (needs a new X.org DDX)
Also allows PCI GART table to be stored in VRAM for non PCIE cards

Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/char/drm/ati_pcigart.c b/drivers/char/drm/ati_pcigart.c
index 0aec5ef..957596c 100644
--- a/drivers/char/drm/ati_pcigart.c
+++ b/drivers/char/drm/ati_pcigart.c
@@ -91,9 +91,7 @@
 	free_pages( address, ATI_PCIGART_TABLE_ORDER );
 }
 
-int drm_ati_pcigart_cleanup( drm_device_t *dev,
-			      unsigned long addr,
-			      dma_addr_t bus_addr)
+int drm_ati_pcigart_cleanup(drm_device_t *dev, drm_ati_pcigart_info *gart_info)
 {
 	drm_sg_mem_t *entry = dev->sg;
 	unsigned long pages;
@@ -105,10 +103,12 @@
 		return 0;
 	}
 
-	if ( bus_addr ) {
-		pci_unmap_single(dev->pdev, bus_addr,
-				 ATI_PCIGART_TABLE_PAGES * PAGE_SIZE,
-				 PCI_DMA_TODEVICE);
+	if (gart_info->bus_addr) {
+		if (gart_info->gart_table_location==DRM_ATI_GART_MAIN) {
+			pci_unmap_single(dev->pdev, gart_info->bus_addr,
+					 ATI_PCIGART_TABLE_PAGES * PAGE_SIZE,
+					 PCI_DMA_TODEVICE);
+		}
 
 		pages = ( entry->pages <= ATI_MAX_PCIGART_PAGES )
 		        ? entry->pages : ATI_MAX_PCIGART_PAGES;
@@ -118,19 +118,21 @@
 			pci_unmap_single(dev->pdev, entry->busaddr[i],
 					 PAGE_SIZE, PCI_DMA_TODEVICE);
 		}
+		
+ 		if (gart_info->gart_table_location==DRM_ATI_GART_MAIN)
+			gart_info->bus_addr=0;
 	}
 
-	if ( addr ) {
-		drm_ati_free_pcigart_table( addr );
+ 	if (gart_info->gart_table_location==DRM_ATI_GART_MAIN && gart_info->addr) {
+		drm_ati_free_pcigart_table(gart_info->addr);
+		gart_info->addr=0;
 	}
 
 	return 1;
 }
 EXPORT_SYMBOL(drm_ati_pcigart_cleanup);
 
-int drm_ati_pcigart_init( drm_device_t *dev,
-			   unsigned long *addr,
-			   dma_addr_t *bus_addr)
+int drm_ati_pcigart_init(drm_device_t *dev, drm_ati_pcigart_info *gart_info)
 {
 	drm_sg_mem_t *entry = dev->sg;
 	unsigned long address = 0;
@@ -143,25 +145,36 @@
 		goto done;
 	}
 
-	address = drm_ati_alloc_pcigart_table();
-	if ( !address ) {
-		DRM_ERROR( "cannot allocate PCI GART page!\n" );
-		goto done;
-	}
+	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
+	{
+		DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
+		
+		address = drm_ati_alloc_pcigart_table();
+		if ( !address ) {
+			DRM_ERROR( "cannot allocate PCI GART page!\n" );
+			goto done;
+		}
+		
+		if ( !dev->pdev ) {
+			DRM_ERROR( "PCI device unknown!\n" );
+			goto done;
+		}
 
-	if ( !dev->pdev ) {
-		DRM_ERROR( "PCI device unknown!\n" );
-		goto done;
+		bus_address = pci_map_single(dev->pdev, (void *)address,
+					     ATI_PCIGART_TABLE_PAGES * PAGE_SIZE,
+					     PCI_DMA_TODEVICE);
+		if (bus_address == 0) {
+			DRM_ERROR( "unable to map PCIGART pages!\n" );
+			drm_ati_free_pcigart_table( address );
+			address = 0;
+			goto done;
+		}
 	}
-
-	bus_address = pci_map_single(dev->pdev, (void *)address,
-				  ATI_PCIGART_TABLE_PAGES * PAGE_SIZE,
-				  PCI_DMA_TODEVICE);
-	if (bus_address == 0) {
-		DRM_ERROR( "unable to map PCIGART pages!\n" );
-		drm_ati_free_pcigart_table( address );
-		address = 0;
-		goto done;
+	else
+	{
+		address = gart_info->addr;
+		bus_address = gart_info->bus_addr;
+ 		DRM_DEBUG("PCI: Gart Table: VRAM %08X mapped at %08lX\n", bus_address, address);
 	}
 
 	pci_gart = (u32 *)address;
@@ -179,7 +192,7 @@
 					   PCI_DMA_TODEVICE);
 		if (entry->busaddr[i] == 0) {
 			DRM_ERROR( "unable to map PCIGART pages!\n" );
-			drm_ati_pcigart_cleanup( dev, address, bus_address );
+			drm_ati_pcigart_cleanup(dev, gart_info);
 			address = 0;
 			bus_address = 0;
 			goto done;
@@ -187,7 +200,10 @@
 		page_base = (u32) entry->busaddr[i];
 
 		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
-			*pci_gart++ = cpu_to_le32( page_base );
+			if (gart_info->is_pcie)
+				*pci_gart = (cpu_to_le32(page_base)>>8) | 0xc;
+			else
+				*pci_gart++ = cpu_to_le32( page_base );
 			page_base += ATI_PCIGART_PAGE_SIZE;
 		}
 	}
@@ -201,8 +217,8 @@
 #endif
 
 done:
-	*addr = address;
-	*bus_addr = bus_address;
+	gart_info->addr = address;
+ 	gart_info->bus_addr = bus_address;
 	return ret;
 }
 EXPORT_SYMBOL(drm_ati_pcigart_init);
diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h
index 6f98701..c164c76 100644
--- a/drivers/char/drm/drmP.h
+++ b/drivers/char/drm/drmP.h
@@ -532,6 +532,17 @@
 } drm_vbl_sig_t;
 
 
+/* location of GART table */
+#define DRM_ATI_GART_MAIN 1
+#define DRM_ATI_GART_FB   2
+
+typedef struct ati_pcigart_info {
+	int gart_table_location;
+	int is_pcie;
+	unsigned long addr;
+	dma_addr_t bus_addr;
+} drm_ati_pcigart_info;
+
 /**
  * DRM driver structure. This structure represent the common code for
  * a family of cards. There will one drm_device for each card present
@@ -975,12 +986,8 @@
 				   unsigned int cmd, unsigned long arg);
 
                                /* ATI PCIGART support (ati_pcigart.h) */
-extern int            drm_ati_pcigart_init(drm_device_t *dev,
-					    unsigned long *addr,
-					    dma_addr_t *bus_addr);
-extern int            drm_ati_pcigart_cleanup(drm_device_t *dev,
-					       unsigned long addr,
-					       dma_addr_t bus_addr);
+extern int drm_ati_pcigart_init(drm_device_t * dev, drm_ati_pcigart_info *gart_info);
+extern int drm_ati_pcigart_cleanup(drm_device_t * dev, drm_ati_pcigart_info *gart_info);
 
 extern drm_dma_handle_t *drm_pci_alloc(drm_device_t *dev, size_t size,
 				       size_t align, dma_addr_t maxaddr);
@@ -1038,6 +1045,11 @@
 	return pci_find_capability(dev->pdev, PCI_CAP_ID_AGP);
 }
 
+static __inline__ int drm_device_is_pcie(drm_device_t *dev)
+{
+	return pci_find_capability(dev->pdev, PCI_CAP_ID_EXP);
+}
+
 static __inline__ void drm_core_dropmap(struct drm_map *map)
 {
 }
diff --git a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c
index 8951522..ac3ea2b 100644
--- a/drivers/char/drm/r128_cce.c
+++ b/drivers/char/drm/r128_cce.c
@@ -562,14 +562,16 @@
 #if __OS_HAS_AGP
 	if ( dev_priv->is_pci ) {
 #endif
-		if (!drm_ati_pcigart_init( dev, &dev_priv->phys_pci_gart,
-     					    &dev_priv->bus_pci_gart) ) {
+		dev_priv->gart_info.gart_table_location = DRM_ATI_GART_MAIN;
+		dev_priv->gart_info.addr = dev_priv->gart_info.bus_addr = 0;
+ 		dev_priv->gart_info.is_pcie = 0;
+		if (!drm_ati_pcigart_init(dev, &dev_priv->gart_info)) {
 			DRM_ERROR( "failed to init PCI GART!\n" );
 			dev->dev_private = (void *)dev_priv;
 			r128_do_cleanup_cce( dev );
 			return DRM_ERR(ENOMEM);
 		}
-		R128_WRITE( R128_PCI_GART_PAGE, dev_priv->bus_pci_gart );
+		R128_WRITE(R128_PCI_GART_PAGE, dev_priv->gart_info.bus_addr);
 #if __OS_HAS_AGP
 	}
 #endif
@@ -607,10 +609,10 @@
 		} else
 #endif
 		{
-			if (!drm_ati_pcigart_cleanup( dev,
-						dev_priv->phys_pci_gart,
-						dev_priv->bus_pci_gart ))
-				DRM_ERROR( "failed to cleanup PCI GART!\n" );
+ 			if (dev_priv->gart_info.bus_addr)
+				if (!drm_ati_pcigart_cleanup( dev,
+							      &dev_priv->gart_info))
+					DRM_ERROR( "failed to cleanup PCI GART!\n" );
 		}
 
 		drm_free( dev->dev_private, sizeof(drm_r128_private_t),
diff --git a/drivers/char/drm/r128_drv.h b/drivers/char/drm/r128_drv.h
index 0fb687c9..938dfae 100644
--- a/drivers/char/drm/r128_drv.h
+++ b/drivers/char/drm/r128_drv.h
@@ -88,8 +88,6 @@
 
 	int usec_timeout;
 	int is_pci;
-	unsigned long phys_pci_gart;
-	dma_addr_t bus_pci_gart;
 	unsigned long cce_buffers_offset;
 
 	atomic_t idle_count;
@@ -120,6 +118,7 @@
 	drm_local_map_t *cce_ring;
 	drm_local_map_t *ring_rptr;
 	drm_local_map_t *agp_textures;
+	drm_ati_pcigart_info gart_info;
 } drm_r128_private_t;
 
 typedef struct drm_r128_buf_priv {
diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c
index 6d9080a..6dff5e4 100644
--- a/drivers/char/drm/radeon_cp.c
+++ b/drivers/char/drm/radeon_cp.c
@@ -825,6 +825,12 @@
 	return RADEON_READ(RADEON_CLOCK_CNTL_DATA);
 }
 
+static int RADEON_READ_PCIE(drm_radeon_private_t *dev_priv, int addr)
+{
+	RADEON_WRITE8(RADEON_PCIE_INDEX, addr & 0xff);
+	return RADEON_READ(RADEON_PCIE_DATA);
+}
+
 #if RADEON_FIFO_DEBUG
 static void radeon_status( drm_radeon_private_t *dev_priv )
 {
@@ -1241,17 +1247,46 @@
 		       RADEON_ISYNC_CPSCRATCH_IDLEGUI) );
 }
 
+/* Enable or disable PCI-E GART on the chip */
+static void radeon_set_pciegart(drm_radeon_private_t * dev_priv, int on)
+{
+	u32 tmp = RADEON_READ_PCIE(dev_priv, RADEON_PCIE_TX_GART_CNTL);
+	if (on) {
+
+		DRM_DEBUG("programming pcie %08X %08lX %08X\n",
+			  dev_priv->gart_vm_start, (long)dev_priv->gart_info.bus_addr,
+			  dev_priv->gart_size);
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, dev_priv->gart_vm_start);
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_BASE, dev_priv->gart_info.bus_addr);
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_START_LO, dev_priv->gart_vm_start);
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_END_LO, dev_priv->gart_vm_start
+				  + dev_priv->gart_size - 1);
+		
+		RADEON_WRITE(RADEON_MC_AGP_LOCATION, 0xffffffc0);	/* ?? */
+		
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL, RADEON_PCIE_TX_GART_EN);
+	} else {
+		RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
+	}
+}
+
 /* Enable or disable PCI GART on the chip */
 static void radeon_set_pcigart( drm_radeon_private_t *dev_priv, int on )
 {
 	u32 tmp	= RADEON_READ( RADEON_AIC_CNTL );
 
+	if (dev_priv->flags & CHIP_IS_PCIE)
+	{
+		radeon_set_pciegart(dev_priv, on);
+		return;
+	}
+
 	if ( on ) {
 		RADEON_WRITE( RADEON_AIC_CNTL, tmp | RADEON_PCIGART_TRANSLATE_EN );
 
 		/* set PCI GART page-table base address
 		 */
-		RADEON_WRITE( RADEON_AIC_PT_BASE, dev_priv->bus_pci_gart );
+		RADEON_WRITE(RADEON_AIC_PT_BASE, dev_priv->gart_info.bus_addr);
 
 		/* set address range for PCI address translate
 		 */
@@ -1519,8 +1554,28 @@
 	} else
 #endif
 	{
-		if (!drm_ati_pcigart_init( dev, &dev_priv->phys_pci_gart,
-					    &dev_priv->bus_pci_gart)) {
+		/* if we have an offset set from userspace */
+		if (dev_priv->pcigart_offset) {
+			dev_priv->gart_info.bus_addr = dev_priv->pcigart_offset + dev_priv->fb_location;
+			dev_priv->gart_info.addr = (unsigned long)drm_ioremap(dev_priv->gart_info.bus_addr, RADEON_PCIGART_TABLE_SIZE, dev);
+
+			dev_priv->gart_info.is_pcie = !!(dev_priv->flags & CHIP_IS_PCIE);
+			dev_priv->gart_info.gart_table_location = DRM_ATI_GART_FB;
+			
+			DRM_DEBUG("Setting phys_pci_gart to %08lX %08lX\n", dev_priv->gart_info.addr, dev_priv->pcigart_offset);
+		}
+		else {
+			dev_priv->gart_info.gart_table_location = DRM_ATI_GART_MAIN;
+			dev_priv->gart_info.addr = dev_priv->gart_info.bus_addr= 0;
+			if (dev_priv->flags & CHIP_IS_PCIE)
+			{
+				DRM_ERROR("Cannot use PCI Express without GART in FB memory\n");
+				radeon_do_cleanup_cp(dev);
+				return DRM_ERR(EINVAL);
+			}
+		}
+
+		if (!drm_ati_pcigart_init(dev, &dev_priv->gart_info)) {
 			DRM_ERROR( "failed to init PCI GART!\n" );
 			dev->dev_private = (void *)dev_priv;
 			radeon_do_cleanup_cp(dev);
@@ -1568,10 +1623,15 @@
 	} else
 #endif
 	{
-		if (!drm_ati_pcigart_cleanup( dev,
-					      dev_priv->phys_pci_gart,
-					      dev_priv->bus_pci_gart ))
-			DRM_ERROR( "failed to cleanup PCI GART!\n" );
+		if (dev_priv->gart_info.bus_addr)
+			if (!drm_ati_pcigart_cleanup(dev, &dev_priv->gart_info))
+				DRM_ERROR("failed to cleanup PCI GART!\n");
+		
+		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB)
+		{
+			drm_ioremapfree((void *)dev_priv->gart_info.addr, RADEON_PCIGART_TABLE_SIZE, dev);
+			dev_priv->gart_info.addr = 0;
+		}
 	}
 	
 	/* only clear to the start of flags */
@@ -2057,6 +2117,9 @@
 	if (drm_device_is_agp(dev))
 		dev_priv->flags |= CHIP_IS_AGP;
 	
+	if (drm_device_is_pcie(dev))
+		dev_priv->flags |= CHIP_IS_PCIE;
+
 	DRM_DEBUG("%s card detected\n",
 		  ((dev_priv->flags & CHIP_IS_AGP) ? "AGP" : "PCI"));
 	return ret;
diff --git a/drivers/char/drm/radeon_drm.h b/drivers/char/drm/radeon_drm.h
index 67a8bf0..dd52661 100644
--- a/drivers/char/drm/radeon_drm.h
+++ b/drivers/char/drm/radeon_drm.h
@@ -698,6 +698,7 @@
 
 #define RADEON_SETPARAM_FB_LOCATION    1	/* determined framebuffer location */
 #define RADEON_SETPARAM_SWITCH_TILING  2	/* enable/disable color tiling */
+#define RADEON_SETPARAM_PCIGART_LOCATION 3      /* PCI Gart Location */
 
 /* 1.14: Clients can allocate/free a surface
  */
diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h
index 430598e..9c10141 100644
--- a/drivers/char/drm/radeon_drv.h
+++ b/drivers/char/drm/radeon_drv.h
@@ -38,7 +38,7 @@
 
 #define DRIVER_NAME		"radeon"
 #define DRIVER_DESC		"ATI Radeon"
-#define DRIVER_DATE		"20050720"
+#define DRIVER_DATE		"20050911"
 
 /* Interface history:
  *
@@ -87,9 +87,10 @@
  *       R200_EMIT_PP_AFS_0/1, R200_EMIT_PP_TXCTLALL_0-5 (replaces
  *       R200_EMIT_PP_TXFILTER_0-5, 2 more regs) and R200_EMIT_ATF_TFACTOR
  *       (replaces R200_EMIT_TFACTOR_0 (8 consts instead of 6)
+ * 1.19- Add support for gart table in FB memory and PCIE r300
  */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		18
+#define DRIVER_MINOR		19
 #define DRIVER_PATCHLEVEL	0
 
 #define GET_RING_HEAD(dev_priv)		DRM_READ32(  (dev_priv)->ring_rptr, 0 )
@@ -134,6 +135,7 @@
 	CHIP_SINGLE_CRTC = 0x00040000UL,
 	CHIP_IS_AGP = 0x00080000UL,
 	CHIP_HAS_HIERZ = 0x00100000UL, 
+	CHIP_IS_PCIE = 0x00200000UL,
 };
 
 typedef struct drm_radeon_freelist {
@@ -213,8 +215,6 @@
 	int microcode_version;
 
 	int is_pci;
-	unsigned long phys_pci_gart;
-	dma_addr_t bus_pci_gart;
 
 	struct {
 		u32 boxes;
@@ -270,6 +270,9 @@
 	struct radeon_surface surfaces[RADEON_MAX_SURFACES];
 	struct radeon_virt_surface virt_surfaces[2*RADEON_MAX_SURFACES];
 
+ 	unsigned long pcigart_offset;
+ 	drm_ati_pcigart_info gart_info;
+
 	/* starting from here on, data is preserved accross an open */
 	uint32_t flags;		/* see radeon_chip_flags */
 } drm_radeon_private_t;
@@ -373,6 +376,25 @@
 #define RADEON_CRTC2_OFFSET		0x0324
 #define RADEON_CRTC2_OFFSET_CNTL	0x0328
 
+#define RADEON_PCIE_INDEX               0x0030
+#define RADEON_PCIE_DATA                0x0034
+#define RADEON_PCIE_TX_GART_CNTL	0x10
+#	define RADEON_PCIE_TX_GART_EN   	(1 << 0)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_PASS_THRU (0<<1)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_CLAMP_LO  (1<<1)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD   (3<<1)
+#	define RADEON_PCIE_TX_GART_MODE_32_128_CACHE	(0<<3)
+#	define RADEON_PCIE_TX_GART_MODE_8_4_128_CACHE	(1<<3)
+#	define RADEON_PCIE_TX_GART_CHK_RW_VALID_EN      (1<<5)
+#	define RADEON_PCIE_TX_GART_INVALIDATE_TLB	(1<<8)
+#define RADEON_PCIE_TX_DISCARD_RD_ADDR_LO 0x11
+#define RADEON_PCIE_TX_DISCARD_RD_ADDR_HI 0x12
+#define RADEON_PCIE_TX_GART_BASE  	0x13
+#define RADEON_PCIE_TX_GART_START_LO	0x14
+#define RADEON_PCIE_TX_GART_START_HI	0x15
+#define RADEON_PCIE_TX_GART_END_LO	0x16
+#define RADEON_PCIE_TX_GART_END_HI	0x17
+
 #define RADEON_MPP_TB_CONFIG		0x01c0
 #define RADEON_MEM_CNTL			0x0140
 #define RADEON_MEM_SDRAM_MODE_REG	0x0158
@@ -878,6 +900,8 @@
 
 #define RADEON_RING_HIGH_MARK		128
 
+#define RADEON_PCIGART_TABLE_SIZE      (32*1024)
+
 #define RADEON_READ(reg)	DRM_READ32(  dev_priv->mmio, (reg) )
 #define RADEON_WRITE(reg,val)	DRM_WRITE32( dev_priv->mmio, (reg), (val) )
 #define RADEON_READ8(reg)	DRM_READ8(  dev_priv->mmio, (reg) )
@@ -890,6 +914,13 @@
 	RADEON_WRITE( RADEON_CLOCK_CNTL_DATA, (val) );			\
 } while (0)
 
+#define RADEON_WRITE_PCIE( addr, val )					\
+do {									\
+	RADEON_WRITE8( RADEON_PCIE_INDEX,				\
+			((addr) & 0xff));				\
+	RADEON_WRITE( RADEON_PCIE_DATA, (val) );			\
+} while (0)
+
 #define CP_PACKET0( reg, n )						\
 	(RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2))
 #define CP_PACKET0_TABLE( reg, n )					\
diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c
index c4325f1..74c2fe8 100644
--- a/drivers/char/drm/radeon_state.c
+++ b/drivers/char/drm/radeon_state.c
@@ -3034,6 +3034,9 @@
 			dev_priv->sarea_priv->tiling_enabled = 1;
 		}
 		break;	
+	case RADEON_SETPARAM_PCIGART_LOCATION:
+		dev_priv->pcigart_offset = sp.value;
+		break;
 	default:
 		DRM_DEBUG( "Invalid parameter %d\n", sp.param );
 		return DRM_ERR( EINVAL );