powerpc: Dynamic DMA zone limits

Platform code can call limit_zone_pfn() to set appropriate limits
for ZONE_DMA and ZONE_DMA32, and dma_direct_alloc_coherent() will
select a suitable zone based on a device's mask and the pfn limits that
platform code has configured.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bd6dd6e..d003409 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -285,6 +285,10 @@
 	bool
 	default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
 
+config ZONE_DMA32
+	bool
+	default y if PPC64
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index d98c1ec..6d74167 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -4,6 +4,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
+#include <linux/mmzone.h>
 #include <asm/processor.h>		/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -281,6 +282,8 @@
 
 extern pgd_t swapper_pg_dir[];
 
+void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
+int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
 /*
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index ee78f6e..dfd99ef 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -40,6 +40,26 @@
 #else
 	struct page *page;
 	int node = dev_to_node(dev);
+	u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
+	int zone;
+
+	zone = dma_pfn_limit_to_zone(pfn);
+	if (zone < 0) {
+		dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
+			__func__, pfn);
+		return NULL;
+	}
+
+	switch (zone) {
+	case ZONE_DMA:
+		flag |= GFP_DMA;
+		break;
+#ifdef CONFIG_ZONE_DMA32
+	case ZONE_DMA32:
+		flag |= GFP_DMA32;
+		break;
+#endif
+	};
 
 	/* ignore region specifiers */
 	flag  &= ~(__GFP_HIGHMEM);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f..687e7f7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -260,6 +260,54 @@
 	return 0;
 }
 
+static bool zone_limits_final;
+
+static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+	[0 ... MAX_NR_ZONES - 1] = ~0UL
+};
+
+/*
+ * Restrict the specified zone and all more restrictive zones
+ * to be below the specified pfn.  May not be called after
+ * paging_init().
+ */
+void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
+{
+	int i;
+
+	if (WARN_ON(zone_limits_final))
+		return;
+
+	for (i = zone; i >= 0; i--) {
+		if (max_zone_pfns[i] > pfn_limit)
+			max_zone_pfns[i] = pfn_limit;
+	}
+}
+
+/*
+ * Find the least restrictive zone that is entirely below the
+ * specified pfn limit.  Returns < 0 if no suitable zone is found.
+ *
+ * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
+ * systems -- the DMA limit can be higher than any possible real pfn.
+ */
+int dma_pfn_limit_to_zone(u64 pfn_limit)
+{
+	enum zone_type top_zone = ZONE_NORMAL;
+	int i;
+
+#ifdef CONFIG_HIGHMEM
+	top_zone = ZONE_HIGHMEM;
+#endif
+
+	for (i = top_zone; i >= 0; i--) {
+		if (max_zone_pfns[i] <= pfn_limit)
+			return i;
+	}
+
+	return -EPERM;
+}
+
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
  */
@@ -267,7 +315,7 @@
 {
 	unsigned long long total_ram = memblock_phys_mem_size();
 	phys_addr_t top_of_ram = memblock_end_of_DRAM();
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	enum zone_type top_zone;
 
 #ifdef CONFIG_PPC32
 	unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
@@ -289,13 +337,16 @@
 	       (unsigned long long)top_of_ram, total_ram);
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (long int)((top_of_ram - total_ram) >> 20));
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
 #ifdef CONFIG_HIGHMEM
-	max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT;
-	max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT;
+	top_zone = ZONE_HIGHMEM;
+	limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
 #else
-	max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	top_zone = ZONE_NORMAL;
 #endif
+
+	limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
+	zone_limits_final = true;
 	free_area_init_nodes(max_zone_pfns);
 
 	mark_nonram_nosave();