microblaze: Fix consistent code

This patch fix consistent code which had problems with consistent_free
function.
I am not sure if we need to call flush_tlb_all after it but it keeps
tlbs synced.
I added noMMU and MMU version together.

Uncached shadow feature is not tested.

Signed-off-by: Michal Simek <monstr@monstr.eu>
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 16a117e..ca2d928 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -568,7 +568,7 @@
 extern unsigned long ioremap_bot, ioremap_base;
 
 void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle);
-void consistent_free(void *vaddr);
+void consistent_free(size_t size, void *vaddr);
 void consistent_sync(void *vaddr, size_t size, int direction);
 void consistent_sync_page(struct page *page, unsigned long offset,
 	size_t size, int direction);
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index ce72dd4..9dcd90b 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -74,7 +74,7 @@
 			      void *vaddr, dma_addr_t dma_handle)
 {
 #ifdef NOT_COHERENT_CACHE
-	consistent_free(vaddr);
+	consistent_free(size, vaddr);
 #else
 	free_pages((unsigned long)vaddr, get_order(size));
 #endif
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index f956e24..713431c 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -42,11 +42,12 @@
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cpuinfo.h>
+#include <asm/tlbflush.h>
 
 #ifndef CONFIG_MMU
-
 /* I have to use dcache values because I can't relate on ram size */
-#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
+# define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
+#endif
 
 /*
  * Consistent memory allocators. Used for DMA devices that want to
@@ -60,71 +61,16 @@
  */
 void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
 {
-	struct page *page, *end, *free;
-	unsigned long order;
-	void *ret, *virt;
+	unsigned long order, vaddr;
+	void *ret;
+	unsigned int i, err = 0;
+	struct page *page, *end;
 
-	if (in_interrupt())
-		BUG();
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	page = alloc_pages(gfp, order);
-	if (!page)
-		goto no_page;
-
-	/* We could do with a page_to_phys and page_to_bus here. */
-	virt = page_address(page);
-	ret = ioremap(virt_to_phys(virt), size);
-	if (!ret)
-		goto no_remap;
-
-	/*
-	 * Here's the magic!  Note if the uncached shadow is not implemented,
-	 * it's up to the calling code to also test that condition and make
-	 * other arranegments, such as manually flushing the cache and so on.
-	 */
-#ifdef CONFIG_XILINX_UNCACHED_SHADOW
-	ret = (void *)((unsigned) ret | UNCACHED_SHADOW_MASK);
-#endif
-	/* dma_handle is same as physical (shadowed) address */
-	*dma_handle = (dma_addr_t)ret;
-
-	/*
-	 * free wasted pages.  We skip the first page since we know
-	 * that it will have count = 1 and won't require freeing.
-	 * We also mark the pages in use as reserved so that
-	 * remap_page_range works.
-	 */
-	page = virt_to_page(virt);
-	free = page + (size >> PAGE_SHIFT);
-	end  = page + (1 << order);
-
-	for (; page < end; page++) {
-		init_page_count(page);
-		if (page >= free)
-			__free_page(page);
-		else
-			SetPageReserved(page);
-	}
-
-	return ret;
-no_remap:
-	__free_pages(page, order);
-no_page:
-	return NULL;
-}
-
-#else
-
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
-{
-	int order, err, i;
-	unsigned long page, va, flags;
+#ifdef CONFIG_MMU
 	phys_addr_t pa;
 	struct vm_struct *area;
-	void	 *ret;
+	unsigned long va;
+#endif
 
 	if (in_interrupt())
 		BUG();
@@ -133,71 +79,133 @@
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
-	page = __get_free_pages(gfp, order);
-	if (!page) {
-		BUG();
+	vaddr = __get_free_pages(gfp, order);
+	if (!vaddr)
 		return NULL;
-	}
 
 	/*
 	 * we need to ensure that there are no cachelines in use,
 	 * or worse dirty in this area.
 	 */
-	flush_dcache_range(virt_to_phys(page), virt_to_phys(page) + size);
+	flush_dcache_range(virt_to_phys((void *)vaddr),
+					virt_to_phys((void *)vaddr) + size);
 
+#ifndef CONFIG_MMU
+	ret = (void *)vaddr;
+	/*
+	 * Here's the magic!  Note if the uncached shadow is not implemented,
+	 * it's up to the calling code to also test that condition and make
+	 * other arranegments, such as manually flushing the cache and so on.
+	 */
+# ifdef CONFIG_XILINX_UNCACHED_SHADOW
+	ret = (void *)((unsigned) ret | UNCACHED_SHADOW_MASK);
+# endif
+	if ((unsigned int)ret > cpuinfo.dcache_base &&
+				(unsigned int)ret < cpuinfo.dcache_high)
+		printk(KERN_WARNING
+			"ERROR: Your cache coherent area is CACHED!!!\n");
+
+	/* dma_handle is same as physical (shadowed) address */
+	*dma_handle = (dma_addr_t)ret;
+#else
 	/* Allocate some common virtual space to map the new pages. */
 	area = get_vm_area(size, VM_ALLOC);
-	if (area == NULL) {
-		free_pages(page, order);
+	if (!area) {
+		free_pages(vaddr, order);
 		return NULL;
 	}
 	va = (unsigned long) area->addr;
 	ret = (void *)va;
 
 	/* This gives us the real physical address of the first page. */
-	*dma_handle = pa = virt_to_bus((void *)page);
-
-	/* MS: This is the whole magic - use cache inhibit pages */
-	flags = _PAGE_KERNEL | _PAGE_NO_CACHE;
+	*dma_handle = pa = virt_to_bus((void *)vaddr);
+#endif
 
 	/*
-	 * Set refcount=1 on all pages in an order>0
-	 * allocation so that vfree() will actually
-	 * free all pages that were allocated.
+	 * free wasted pages.  We skip the first page since we know
+	 * that it will have count = 1 and won't require freeing.
+	 * We also mark the pages in use as reserved so that
+	 * remap_page_range works.
 	 */
-	if (order > 0) {
-		struct page *rpage = virt_to_page(page);
-		for (i = 1; i < (1 << order); i++)
-			init_page_count(rpage+i);
+	page = virt_to_page(vaddr);
+	end = page + (1 << order);
+
+	split_page(page, order);
+
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE) {
+#ifdef CONFIG_MMU
+		/* MS: This is the whole magic - use cache inhibit pages */
+		err = map_page(va + i, pa + i, _PAGE_KERNEL | _PAGE_NO_CACHE);
+#endif
+
+		SetPageReserved(page);
+		page++;
 	}
 
-	err = 0;
-	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
-		err = map_page(va+i, pa+i, flags);
+	/* Free the otherwise unused pages. */
+	while (page < end) {
+		__free_page(page);
+		page++;
+	}
 
 	if (err) {
-		vfree((void *)va);
+		free_pages(vaddr, order);
 		return NULL;
 	}
 
 	return ret;
 }
-#endif /* CONFIG_MMU */
 EXPORT_SYMBOL(consistent_alloc);
 
 /*
  * free page(s) as defined by the above mapping.
  */
-void consistent_free(void *vaddr)
+void consistent_free(size_t size, void *vaddr)
 {
+	struct page *page;
+
 	if (in_interrupt())
 		BUG();
 
+	size = PAGE_ALIGN(size);
+
+#ifndef CONFIG_MMU
 	/* Clear SHADOW_MASK bit in address, and free as per usual */
-#ifdef CONFIG_XILINX_UNCACHED_SHADOW
+# ifdef CONFIG_XILINX_UNCACHED_SHADOW
 	vaddr = (void *)((unsigned)vaddr & ~UNCACHED_SHADOW_MASK);
+# endif
+	page = virt_to_page(vaddr);
+
+	do {
+		ClearPageReserved(page);
+		__free_page(page);
+		page++;
+	} while (size -= PAGE_SIZE);
+#else
+	do {
+		pte_t *ptep;
+		unsigned long pfn;
+
+		ptep = pte_offset_kernel(pmd_offset(pgd_offset_k(
+						(unsigned int)vaddr),
+					(unsigned int)vaddr),
+				(unsigned int)vaddr);
+		if (!pte_none(*ptep) && pte_present(*ptep)) {
+			pfn = pte_pfn(*ptep);
+			pte_clear(&init_mm, (unsigned int)vaddr, ptep);
+			if (pfn_valid(pfn)) {
+				page = pfn_to_page(pfn);
+
+				ClearPageReserved(page);
+				__free_page(page);
+			}
+		}
+		vaddr += PAGE_SIZE;
+	} while (size -= PAGE_SIZE);
+
+	/* flush tlb */
+	flush_tlb_all();
 #endif
-	vfree(vaddr);
 }
 EXPORT_SYMBOL(consistent_free);