drm/radeon: upgrade to 1.27 - make PCI GART more flexible

radeon: make PCI GART aperture size variable, but making table size variable
    This is precursor to getting a TTM backend for this stuff, and also
    allows the PCI table to be allocated at fb 0
radeon: add support for reverse engineered xpress200m

    The IGPGART setup code was traced using mmio-trace on fglrx by myself
    and Phillip Ezolt <phillipezolt@gmail.com> on dri-devel.

    This code doesn't let the 3D driver work properly as the card has no
    vertex shader support.

    Thanks to Matthew Garrett + Ubuntu for providing me some hardware to do this
    work on.

Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/char/drm/ati_pcigart.c b/drivers/char/drm/ati_pcigart.c
index bd7be09..5b91bc0 100644
--- a/drivers/char/drm/ati_pcigart.c
+++ b/drivers/char/drm/ati_pcigart.c
@@ -33,59 +33,44 @@
 
 #include "drmP.h"
 
-#if PAGE_SIZE == 65536
-# define ATI_PCIGART_TABLE_ORDER	0
-# define ATI_PCIGART_TABLE_PAGES	(1 << 0)
-#elif PAGE_SIZE == 16384
-# define ATI_PCIGART_TABLE_ORDER	1
-# define ATI_PCIGART_TABLE_PAGES	(1 << 1)
-#elif PAGE_SIZE == 8192
-# define ATI_PCIGART_TABLE_ORDER 	2
-# define ATI_PCIGART_TABLE_PAGES 	(1 << 2)
-#elif PAGE_SIZE == 4096
-# define ATI_PCIGART_TABLE_ORDER 	3
-# define ATI_PCIGART_TABLE_PAGES 	(1 << 3)
-#else
-# error - PAGE_SIZE not 64K, 16K, 8K or 4K
-#endif
-
-# define ATI_MAX_PCIGART_PAGES		8192	/**< 32 MB aperture, 4K pages */
 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
 
-static void *drm_ati_alloc_pcigart_table(void)
+static void *drm_ati_alloc_pcigart_table(int order)
 {
 	unsigned long address;
 	struct page *page;
 	int i;
-	DRM_DEBUG("%s\n", __FUNCTION__);
+
+	DRM_DEBUG("%s: alloc %d order\n", __FUNCTION__, order);
 
 	address = __get_free_pages(GFP_KERNEL | __GFP_COMP,
-				   ATI_PCIGART_TABLE_ORDER);
+				   order);
 	if (address == 0UL) {
 		return NULL;
 	}
 
 	page = virt_to_page(address);
 
-	for (i = 0; i < ATI_PCIGART_TABLE_PAGES; i++, page++)
+	for (i = 0; i < order; i++, page++)
 		SetPageReserved(page);
 
 	DRM_DEBUG("%s: returning 0x%08lx\n", __FUNCTION__, address);
 	return (void *)address;
 }
 
-static void drm_ati_free_pcigart_table(void *address)
+static void drm_ati_free_pcigart_table(void *address, int order)
 {
 	struct page *page;
 	int i;
+	int num_pages = 1 << order;
 	DRM_DEBUG("%s\n", __FUNCTION__);
 
 	page = virt_to_page((unsigned long)address);
 
-	for (i = 0; i < ATI_PCIGART_TABLE_PAGES; i++, page++)
+	for (i = 0; i < num_pages; i++, page++)
 		ClearPageReserved(page);
 
-	free_pages((unsigned long)address, ATI_PCIGART_TABLE_ORDER);
+	free_pages((unsigned long)address, order);
 }
 
 int drm_ati_pcigart_cleanup(drm_device_t *dev, drm_ati_pcigart_info *gart_info)
@@ -93,6 +78,8 @@
 	drm_sg_mem_t *entry = dev->sg;
 	unsigned long pages;
 	int i;
+	int order;
+	int num_pages, max_pages;
 
 	/* we need to support large memory configurations */
 	if (!entry) {
@@ -100,15 +87,19 @@
 		return 0;
 	}
 
+	order = drm_order((gart_info->table_size + (PAGE_SIZE-1)) / PAGE_SIZE);
+	num_pages = 1 << order;
+
 	if (gart_info->bus_addr) {
 		if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
 			pci_unmap_single(dev->pdev, gart_info->bus_addr,
-					 ATI_PCIGART_TABLE_PAGES * PAGE_SIZE,
+					 num_pages * PAGE_SIZE,
 					 PCI_DMA_TODEVICE);
 		}
 
-		pages = (entry->pages <= ATI_MAX_PCIGART_PAGES)
-		    ? entry->pages : ATI_MAX_PCIGART_PAGES;
+		max_pages = (gart_info->table_size / sizeof(u32));
+		pages = (entry->pages <= max_pages)
+		  ? entry->pages : max_pages;
 
 		for (i = 0; i < pages; i++) {
 			if (!entry->busaddr[i])
@@ -123,13 +114,12 @@
 
 	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN
 	    && gart_info->addr) {
-		drm_ati_free_pcigart_table(gart_info->addr);
+		drm_ati_free_pcigart_table(gart_info->addr, order);
 		gart_info->addr = NULL;
 	}
 
 	return 1;
 }
-
 EXPORT_SYMBOL(drm_ati_pcigart_cleanup);
 
 int drm_ati_pcigart_init(drm_device_t *dev, drm_ati_pcigart_info *gart_info)
@@ -139,6 +129,9 @@
 	unsigned long pages;
 	u32 *pci_gart, page_base, bus_address = 0;
 	int i, j, ret = 0;
+	int order;
+	int max_pages;
+	int num_pages;
 
 	if (!entry) {
 		DRM_ERROR("no scatter/gather memory!\n");
@@ -148,7 +141,10 @@
 	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
 		DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
 
-		address = drm_ati_alloc_pcigart_table();
+		order = drm_order((gart_info->table_size +
+				   (PAGE_SIZE-1)) / PAGE_SIZE);
+		num_pages = 1 << order;
+		address = drm_ati_alloc_pcigart_table(order);
 		if (!address) {
 			DRM_ERROR("cannot allocate PCI GART page!\n");
 			goto done;
@@ -160,11 +156,13 @@
 		}
 
 		bus_address = pci_map_single(dev->pdev, address,
-					     ATI_PCIGART_TABLE_PAGES *
-					     PAGE_SIZE, PCI_DMA_TODEVICE);
+					     num_pages * PAGE_SIZE,
+					     PCI_DMA_TODEVICE);
 		if (bus_address == 0) {
 			DRM_ERROR("unable to map PCIGART pages!\n");
-			drm_ati_free_pcigart_table(address);
+			order = drm_order((gart_info->table_size +
+					   (PAGE_SIZE-1)) / PAGE_SIZE);
+			drm_ati_free_pcigart_table(address, order);
 			address = NULL;
 			goto done;
 		}
@@ -177,10 +175,11 @@
 
 	pci_gart = (u32 *) address;
 
-	pages = (entry->pages <= ATI_MAX_PCIGART_PAGES)
-	    ? entry->pages : ATI_MAX_PCIGART_PAGES;
+	max_pages = (gart_info->table_size / sizeof(u32));
+	pages = (entry->pages <= max_pages)
+	    ? entry->pages : max_pages;
 
-	memset(pci_gart, 0, ATI_MAX_PCIGART_PAGES * sizeof(u32));
+	memset(pci_gart, 0, max_pages * sizeof(u32));
 
 	for (i = 0; i < pages; i++) {
 		/* we need to support large memory configurations */
@@ -198,10 +197,18 @@
 		page_base = (u32) entry->busaddr[i];
 
 		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
-			if (gart_info->is_pcie)
+			switch(gart_info->gart_reg_if) {
+			case DRM_ATI_GART_IGP:
+				*pci_gart = cpu_to_le32((page_base) | 0xc);
+				break;
+			case DRM_ATI_GART_PCIE:
 				*pci_gart = cpu_to_le32((page_base >> 8) | 0xc);
-			else
+				break;
+			default:
+			case DRM_ATI_GART_PCI:
 				*pci_gart = cpu_to_le32(page_base);
+				break;
+			}
 			pci_gart++;
 			page_base += ATI_PCIGART_PAGE_SIZE;
 		}
@@ -220,5 +227,4 @@
 	gart_info->bus_addr = bus_address;
 	return ret;
 }
-
 EXPORT_SYMBOL(drm_ati_pcigart_init);
diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h
index 80041d5..d494315 100644
--- a/drivers/char/drm/drmP.h
+++ b/drivers/char/drm/drmP.h
@@ -519,12 +519,17 @@
 #define DRM_ATI_GART_MAIN 1
 #define DRM_ATI_GART_FB   2
 
+#define DRM_ATI_GART_PCI 1
+#define DRM_ATI_GART_PCIE 2
+#define DRM_ATI_GART_IGP 3
+
 typedef struct ati_pcigart_info {
 	int gart_table_location;
-	int is_pcie;
+	int gart_reg_if;
 	void *addr;
 	dma_addr_t bus_addr;
 	drm_local_map_t mapping;
+	int table_size;
 } drm_ati_pcigart_info;
 
 /*
diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h
index 01cf482..31cdde8 100644
--- a/drivers/char/drm/drm_pciids.h
+++ b/drivers/char/drm/drm_pciids.h
@@ -102,6 +102,7 @@
 	{0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \
 	{0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
+	{0x1002, 0x5955, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS400|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \
 	{0x1002, 0x5960, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
 	{0x1002, 0x5961, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
 	{0x1002, 0x5962, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280}, \
diff --git a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c
index db5a604..1014602 100644
--- a/drivers/char/drm/r128_cce.c
+++ b/drivers/char/drm/r128_cce.c
@@ -560,9 +560,10 @@
 	if (dev_priv->is_pci) {
 #endif
 		dev_priv->gart_info.gart_table_location = DRM_ATI_GART_MAIN;
+		dev_priv->gart_info.table_size = R128_PCIGART_TABLE_SIZE;
 		dev_priv->gart_info.addr = NULL;
 		dev_priv->gart_info.bus_addr = 0;
-		dev_priv->gart_info.is_pcie = 0;
+		dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI;
 		if (!drm_ati_pcigart_init(dev, &dev_priv->gart_info)) {
 			DRM_ERROR("failed to init PCI GART!\n");
 			dev->dev_private = (void *)dev_priv;
diff --git a/drivers/char/drm/r128_drv.h b/drivers/char/drm/r128_drv.h
index f1efb49..9086835 100644
--- a/drivers/char/drm/r128_drv.h
+++ b/drivers/char/drm/r128_drv.h
@@ -383,6 +383,8 @@
 
 #define R128_PERFORMANCE_BOXES		0
 
+#define R128_PCIGART_TABLE_SIZE         32768
+
 #define R128_READ(reg)		DRM_READ32(  dev_priv->mmio, (reg) )
 #define R128_WRITE(reg,val)	DRM_WRITE32( dev_priv->mmio, (reg), (val) )
 #define R128_READ8(reg)		DRM_READ8(   dev_priv->mmio, (reg) )
diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c
index c1850ec..68338389 100644
--- a/drivers/char/drm/radeon_cp.c
+++ b/drivers/char/drm/radeon_cp.c
@@ -830,6 +830,15 @@
 	return RADEON_READ(RADEON_PCIE_DATA);
 }
 
+static u32 RADEON_READ_IGPGART(drm_radeon_private_t *dev_priv, int addr)
+{
+	u32 ret;
+	RADEON_WRITE(RADEON_IGPGART_INDEX, addr & 0x7f);
+	ret = RADEON_READ(RADEON_IGPGART_DATA);
+	RADEON_WRITE(RADEON_IGPGART_INDEX, 0x7f);
+	return ret;
+}
+
 #if RADEON_FIFO_DEBUG
 static void radeon_status(drm_radeon_private_t * dev_priv)
 {
@@ -1267,7 +1276,44 @@
 	}
 }
 
-/* Enable or disable PCI-E GART on the chip */
+/* Enable or disable IGP GART on the chip */
+static void radeon_set_igpgart(drm_radeon_private_t * dev_priv, int on)
+{
+	u32 temp, tmp;
+
+	tmp = RADEON_READ(RADEON_AIC_CNTL);
+	if (on) {
+		DRM_DEBUG("programming igpgart %08X %08lX %08X\n",
+			 dev_priv->gart_vm_start,
+			 (long)dev_priv->gart_info.bus_addr,
+			 dev_priv->gart_size);
+
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_UNK_18, 0x1000);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_ENABLE, 0x1);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_CTRL, 0x42040800);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_BASE_ADDR,
+				     dev_priv->gart_info.bus_addr);
+
+		temp = RADEON_READ_IGPGART(dev_priv, RADEON_IGPGART_UNK_39);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_UNK_39, temp);
+
+		RADEON_WRITE(RADEON_AGP_BASE, (unsigned int)dev_priv->gart_vm_start);
+		dev_priv->gart_size = 32*1024*1024;
+		RADEON_WRITE(RADEON_MC_AGP_LOCATION,
+			     (((dev_priv->gart_vm_start - 1 +
+			       dev_priv->gart_size) & 0xffff0000) |
+			     (dev_priv->gart_vm_start >> 16)));
+
+		temp = RADEON_READ_IGPGART(dev_priv, RADEON_IGPGART_ENABLE);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_ENABLE, temp);
+
+		RADEON_READ_IGPGART(dev_priv, RADEON_IGPGART_FLUSH);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_FLUSH, 0x1);
+		RADEON_READ_IGPGART(dev_priv, RADEON_IGPGART_FLUSH);
+		RADEON_WRITE_IGPGART(RADEON_IGPGART_FLUSH, 0x0);
+       }
+}
+
 static void radeon_set_pciegart(drm_radeon_private_t * dev_priv, int on)
 {
 	u32 tmp = RADEON_READ_PCIE(dev_priv, RADEON_PCIE_TX_GART_CNTL);
@@ -1302,6 +1348,11 @@
 {
 	u32 tmp;
 
+	if (dev_priv->flags & RADEON_IS_IGPGART) {
+		radeon_set_igpgart(dev_priv, on);
+		return;
+	}
+
 	if (dev_priv->flags & RADEON_IS_PCIE) {
 		radeon_set_pciegart(dev_priv, on);
 		return;
@@ -1620,20 +1671,22 @@
 #endif
 	{
 		/* if we have an offset set from userspace */
-		if (dev_priv->pcigart_offset) {
+		if (dev_priv->pcigart_offset_set) {
 			dev_priv->gart_info.bus_addr =
 			    dev_priv->pcigart_offset + dev_priv->fb_location;
 			dev_priv->gart_info.mapping.offset =
 			    dev_priv->gart_info.bus_addr;
 			dev_priv->gart_info.mapping.size =
-			    RADEON_PCIGART_TABLE_SIZE;
+			    dev_priv->gart_info.table_size;
 
 			drm_core_ioremap(&dev_priv->gart_info.mapping, dev);
 			dev_priv->gart_info.addr =
 			    dev_priv->gart_info.mapping.handle;
 
-			dev_priv->gart_info.is_pcie =
-			    !!(dev_priv->flags & RADEON_IS_PCIE);
+			if (dev_priv->flags & RADEON_IS_PCIE)
+				dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCIE;
+			else
+				dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI;
 			dev_priv->gart_info.gart_table_location =
 			    DRM_ATI_GART_FB;
 
@@ -1641,6 +1694,10 @@
 				  dev_priv->gart_info.addr,
 				  dev_priv->pcigart_offset);
 		} else {
+			if (dev_priv->flags & RADEON_IS_IGPGART)
+				dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_IGP;
+			else
+				dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI;
 			dev_priv->gart_info.gart_table_location =
 			    DRM_ATI_GART_MAIN;
 			dev_priv->gart_info.addr = NULL;
@@ -1714,7 +1771,7 @@
 		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB)
 		{
 			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
-			dev_priv->gart_info.addr = NULL;
+			dev_priv->gart_info.addr = 0;
 		}
 	}
 	/* only clear to the start of flags */
@@ -2222,6 +2279,8 @@
 	drm_local_map_t *map;
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 
+	dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
+
 	ret = drm_addmap(dev, drm_get_resource_start(dev, 2),
 			 drm_get_resource_len(dev, 2), _DRM_REGISTERS,
 			 _DRM_READ_ONLY, &dev_priv->mmio);
diff --git a/drivers/char/drm/radeon_drm.h b/drivers/char/drm/radeon_drm.h
index 8d6350d..66c4b6f 100644
--- a/drivers/char/drm/radeon_drm.h
+++ b/drivers/char/drm/radeon_drm.h
@@ -707,6 +707,7 @@
 #define RADEON_SETPARAM_SWITCH_TILING  2	/* enable/disable color tiling */
 #define RADEON_SETPARAM_PCIGART_LOCATION 3	/* PCI Gart Location */
 #define RADEON_SETPARAM_NEW_MEMMAP 4		/* Use new memory map */
+#define RADEON_SETPARAM_PCIGART_TABLE_SIZE 5    /* PCI GART Table Size */
 
 /* 1.14: Clients can allocate/free a surface
  */
diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h
index 8b105f1..97c27da 100644
--- a/drivers/char/drm/radeon_drv.h
+++ b/drivers/char/drm/radeon_drv.h
@@ -95,9 +95,11 @@
  * 1.24- Add general-purpose packet for manipulating scratch registers (r300)
  * 1.25- Add support for r200 vertex programs (R200_EMIT_VAP_PVS_CNTL,
  *       new packet type)
+ * 1.26- Add support for variable size PCI(E) gart aperture
+ * 1.27- Add support for IGP GART
  */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		25
+#define DRIVER_MINOR		27
 #define DRIVER_PATCHLEVEL	0
 
 /*
@@ -143,6 +145,7 @@
 	RADEON_IS_PCIE = 0x00200000UL,
 	RADEON_NEW_MEMMAP = 0x00400000UL,
 	RADEON_IS_PCI = 0x00800000UL,
+	RADEON_IS_IGPGART = 0x01000000UL,
 };
 
 #define GET_RING_HEAD(dev_priv)	(dev_priv->writeback_works ? \
@@ -280,6 +283,7 @@
 	struct radeon_virt_surface virt_surfaces[2 * RADEON_MAX_SURFACES];
 
 	unsigned long pcigart_offset;
+	unsigned int pcigart_offset_set;
 	drm_ati_pcigart_info gart_info;
 
 	u32 scratch_ages[5];
@@ -432,6 +436,15 @@
 #define RADEON_PCIE_TX_GART_END_LO	0x16
 #define RADEON_PCIE_TX_GART_END_HI	0x17
 
+#define RADEON_IGPGART_INDEX            0x168
+#define RADEON_IGPGART_DATA             0x16c
+#define RADEON_IGPGART_UNK_18           0x18
+#define RADEON_IGPGART_CTRL             0x2b
+#define RADEON_IGPGART_BASE_ADDR        0x2c
+#define RADEON_IGPGART_FLUSH            0x2e
+#define RADEON_IGPGART_ENABLE           0x38
+#define RADEON_IGPGART_UNK_39           0x39
+
 #define RADEON_MPP_TB_CONFIG		0x01c0
 #define RADEON_MEM_CNTL			0x0140
 #define RADEON_MEM_SDRAM_MODE_REG	0x0158
@@ -964,6 +977,14 @@
 	RADEON_WRITE( RADEON_CLOCK_CNTL_DATA, (val) );			\
 } while (0)
 
+#define RADEON_WRITE_IGPGART( addr, val )				\
+do {									\
+	RADEON_WRITE( RADEON_IGPGART_INDEX,				\
+			((addr) & 0x7f) | (1 << 8));			\
+	RADEON_WRITE( RADEON_IGPGART_DATA, (val) );			\
+	RADEON_WRITE( RADEON_IGPGART_INDEX, 0x7f );			\
+} while (0)
+
 #define RADEON_WRITE_PCIE( addr, val )					\
 do {									\
 	RADEON_WRITE8( RADEON_PCIE_INDEX,				\
diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c
index 938eccb..d604191 100644
--- a/drivers/char/drm/radeon_state.c
+++ b/drivers/char/drm/radeon_state.c
@@ -3145,10 +3145,16 @@
 		break;
 	case RADEON_SETPARAM_PCIGART_LOCATION:
 		dev_priv->pcigart_offset = sp.value;
+		dev_priv->pcigart_offset_set = 1;
 		break;
 	case RADEON_SETPARAM_NEW_MEMMAP:
 		dev_priv->new_memmap = sp.value;
 		break;
+	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
+		dev_priv->gart_info.table_size = sp.value;
+		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
+			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
+		break;
 	default:
 		DRM_DEBUG("Invalid parameter %d\n", sp.param);
 		return DRM_ERR(EINVAL);