[POWERPC] Rewrite IO allocation & mapping on powerpc64

This rewrites pretty much from scratch the handling of MMIO and PIO
space allocations on powerpc64.  The main goals are:

 - Get rid of imalloc and use more common code where possible
 - Simplify the current mess so that PIO space is allocated and
   mapped in a single place for PCI bridges
 - Handle allocation constraints of PIO for all bridges including
   hot plugged ones within the 2GB space reserved for IO ports,
   so that devices on hotplugged busses will now work with drivers
   that assume IO ports fit in an int.
 - Cleanup and separate tracking of the ISA space in the reserved
   low 64K of IO space. No ISA -> Nothing mapped there.

I booted a cell blade with IDE on PIO and MMIO and a dual G5 so
far, that's it :-)

With this patch, all allocations are done using the code in
mm/vmalloc.c, though we use the low level __get_vm_area with
explicit start/stop constraints in order to manage separate
areas for vmalloc/vmap, ioremap, and PCI IOs.

This greatly simplifies a lot of things, as you can see in the
diffstat of that patch :-)

A new pair of functions pcibios_map/unmap_io_space() now replace
all of the previous code that used to manipulate PCI IOs space.
The allocation is done at mapping time, which is now called from
scan_phb's, just before the devices are probed (instead of after,
which is by itself a bug fix). The only other caller is the PCI
hotplug code for hot adding PCI-PCI bridges (slots).

imalloc is gone, as is the "sub-allocation" thing, but I do beleive
that hotplug should still work in the sense that the space allocation
is always done by the PHB, but if you unmap a child bus of this PHB
(which seems to be possible), then the code should properly tear
down all the HPTE mappings for that area of the PHB allocated IO space.

I now always reserve the first 64K of IO space for the bridge with
the ISA bus on it. I have moved the code for tracking ISA in a separate
file which should also make it smarter if we ever are capable of
hot unplugging or re-plugging an ISA bridge.

This should have a side effect on platforms like powermac where VGA IOs
will no longer work. This is done on purpose though as they would have
worked semi-randomly before. The idea at this point is to isolate drivers
that might need to access those and fix them by providing a proper
function to obtain an offset to the legacy IOs of a given bus.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index fa5c828..a895de7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -34,41 +34,27 @@
 #include <linux/stddef.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/nodemask.h>
-#include <linux/module.h>
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
 #include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/rtas.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
-#include <asm/uaccess.h>
 #include <asm/smp.h>
 #include <asm/machdep.h>
 #include <asm/tlb.h>
-#include <asm/eeh.h>
 #include <asm/processor.h>
-#include <asm/mmzone.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
 #include <asm/system.h>
-#include <asm/iommu.h>
 #include <asm/abs_addr.h>
-#include <asm/vdso.h>
 #include <asm/firmware.h>
 
 #include "mmu_decl.h"
 
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
+unsigned long ioremap_bot = IOREMAP_BASE;
 
 /*
  * map_io_page currently only called by __ioremap
@@ -102,8 +88,8 @@
 		 * entry in the hardware page table.
 		 *
 		 */
-		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
-				      mmu_io_psize)) {
+		if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE,
+				      pa, flags, mmu_io_psize)) {
 			printk(KERN_ERR "Failed to do bolted mapping IO "
 			       "memory at %016lx !\n", pa);
 			return -ENOMEM;
@@ -113,8 +99,11 @@
 }
 
 
-static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa,
-			    unsigned long ea, unsigned long size,
+/**
+ * __ioremap_at - Low level function to establish the page tables
+ *                for an IO mapping
+ */
+void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
 			    unsigned long flags)
 {
 	unsigned long i;
@@ -122,17 +111,35 @@
 	if ((flags & _PAGE_PRESENT) == 0)
 		flags |= pgprot_val(PAGE_KERNEL);
 
+	WARN_ON(pa & ~PAGE_MASK);
+	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+	WARN_ON(size & ~PAGE_MASK);
+
 	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_io_page(ea+i, pa+i, flags))
+		if (map_io_page((unsigned long)ea+i, pa+i, flags))
 			return NULL;
 
-	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+	return (void __iomem *)ea;
+}
+
+/**
+ * __iounmap_from - Low level function to tear down the page tables
+ *                  for an IO mapping. This is used for mappings that
+ *                  are manipulated manually, like partial unmapping of
+ *                  PCI IOs or ISA space.
+ */
+void __iounmap_at(void *ea, unsigned long size)
+{
+	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+	WARN_ON(size & ~PAGE_MASK);
+
+	unmap_kernel_range((unsigned long)ea, size);
 }
 
 void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
 			 unsigned long flags)
 {
-	unsigned long pa, ea;
+	phys_addr_t paligned;
 	void __iomem *ret;
 
 	/*
@@ -144,27 +151,30 @@
 	 * IMALLOC_END
 	 * 
 	 */
-	pa = addr & PAGE_MASK;
-	size = PAGE_ALIGN(addr + size) - pa;
+	paligned = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - paligned;
 
-	if ((size == 0) || (pa == 0))
+	if ((size == 0) || (paligned == 0))
 		return NULL;
 
 	if (mem_init_done) {
 		struct vm_struct *area;
-		area = im_get_free_area(size);
+
+		area = __get_vm_area(size, VM_IOREMAP,
+				     ioremap_bot, IOREMAP_END);
 		if (area == NULL)
 			return NULL;
-		ea = (unsigned long)(area->addr);
-		ret = __ioremap_com(addr, pa, ea, size, flags);
+		ret = __ioremap_at(paligned, area->addr, size, flags);
 		if (!ret)
-			im_free(area->addr);
+			vunmap(area->addr);
 	} else {
-		ea = ioremap_bot;
-		ret = __ioremap_com(addr, pa, ea, size, flags);
+		ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
 		if (ret)
 			ioremap_bot += size;
 	}
+
+	if (ret)
+		ret += addr & ~PAGE_MASK;
 	return ret;
 }
 
@@ -187,61 +197,9 @@
 }
 
 
-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
-
-int __ioremap_explicit(phys_addr_t pa, unsigned long ea,
-		       unsigned long size, unsigned long flags)
-{
-	struct vm_struct *area;
-	void __iomem *ret;
-	
-	/* For now, require page-aligned values for pa, ea, and size */
-	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
-	    !IS_PAGE_ALIGNED(size)) {
-		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
-		return 1;
-	}
-	
-	if (!mem_init_done) {
-		/* Two things to consider in this case:
-		 * 1) No records will be kept (imalloc, etc) that the region
-		 *    has been remapped
-		 * 2) It won't be easy to iounmap() the region later (because
-		 *    of 1)
-		 */
-		;
-	} else {
-		area = im_get_area(ea, size,
-			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
-		if (area == NULL) {
-			/* Expected when PHB-dlpar is in play */
-			return 1;
-		}
-		if (ea != (unsigned long) area->addr) {
-			printk(KERN_ERR "unexpected addr return from "
-			       "im_get_area\n");
-			return 1;
-		}
-	}
-	
-	ret = __ioremap_com(pa, pa, ea, size, flags);
-	if (ret == NULL) {
-		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
-		return 1;
-	}
-	if (ret != (void *) ea) {
-		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
-		return 1;
-	}
-
-	return 0;
-}
-
 /*  
  * Unmap an IO region and remove it from imalloc'd list.
  * Access to IO memory should be serialized by driver.
- *
- * XXX	what about calls before mem_init_done (ie python_countermeasures())
  */
 void __iounmap(volatile void __iomem *token)
 {
@@ -250,9 +208,14 @@
 	if (!mem_init_done)
 		return;
 	
-	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
-
-	im_free(addr);
+	addr = (void *) ((unsigned long __force)
+			 PCI_FIX_ADDR(token) & PAGE_MASK);
+	if ((unsigned long)addr < ioremap_bot) {
+		printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
+		       " at 0x%p\n", addr);
+		return;
+	}
+	vunmap(addr);
 }
 
 void iounmap(volatile void __iomem *token)
@@ -263,77 +226,8 @@
 		__iounmap(token);
 }
 
-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
-{
-	struct vm_struct *area;
-
-	/* Check whether subsets of this region exist */
-	area = im_get_area(addr, size, IM_REGION_SUPERSET);
-	if (area == NULL)
-		return 1;
-
-	while (area) {
-		iounmap((void __iomem *) area->addr);
-		area = im_get_area(addr, size,
-				IM_REGION_SUPERSET);
-	}
-
-	return 0;
-}
-
-int __iounmap_explicit(volatile void __iomem *start, unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	int rc;
-	
-	addr = (unsigned long __force) start & PAGE_MASK;
-
-	/* Verify that the region either exists or is a subset of an existing
-	 * region.  In the latter case, split the parent region to create 
-	 * the exact region 
-	 */
-	area = im_get_area(addr, size, 
-			    IM_REGION_EXISTS | IM_REGION_SUBSET);
-	if (area == NULL) {
-		/* Determine whether subset regions exist.  If so, unmap */
-		rc = iounmap_subset_regions(addr, size);
-		if (rc) {
-			printk(KERN_ERR
-			       "%s() cannot unmap nonexistent range 0x%lx\n",
- 				__FUNCTION__, addr);
-			return 1;
-		}
-	} else {
-		iounmap((void __iomem *) area->addr);
-	}
-	/*
-	 * FIXME! This can't be right:
-	iounmap(area->addr);
-	 * Maybe it should be "iounmap(area);"
-	 */
-	return 0;
-}
-
 EXPORT_SYMBOL(ioremap);
 EXPORT_SYMBOL(ioremap_flags);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(__iounmap);
-
-static DEFINE_SPINLOCK(phb_io_lock);
-
-void __iomem * reserve_phb_iospace(unsigned long size)
-{
-	void __iomem *virt_addr;
-		
-	if (phbs_io_bot >= IMALLOC_BASE) 
-		panic("reserve_phb_iospace(): phb io space overflow\n");
-			
-	spin_lock(&phb_io_lock);
-	virt_addr = (void __iomem *) phbs_io_bot;
-	phbs_io_bot += size;
-	spin_unlock(&phb_io_lock);
-
-	return virt_addr;
-}