powerpc/8xx: Fix regression introduced by cache coherency rewrite

After upgrading to the latest kernel on my mpc875 userspace started
running incredibly slow (hours to get to a shell, even!).
I tracked it down to commit 8d30c14cab30d405a05f2aaceda1e9ad57800f36,
that patch removed a work-around for the 8xx. Adding it
back makes my problem go away.

Signed-off-by: Rex Feany <rfeany@mrv.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83f1551..5304093 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+#include "mmu_decl.h"
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 #ifdef CONFIG_SMP
@@ -166,7 +168,7 @@
  * support falls into the same category.
  */
 
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 {
 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
@@ -175,6 +177,17 @@
 		if (!pg)
 			return pte;
 		if (!test_bit(PG_arch_1, &pg->flags)) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly
+			 * "dcbst" from flush_dcache_icache) fault as write
+			 * operation if there is an unpopulated TLB entry
+			 * for the address in question. To workaround that,
+			 * we invalidate the TLB here, thus avoiding dcbst
+			 * misbehaviour.
+			 */
+			/* 8xx doesn't care about PID, size or ind args */
+			_tlbil_va(addr, 0, 0, 0);
+#endif /* CONFIG_8xx */
 			flush_dcache_icache_page(pg);
 			set_bit(PG_arch_1, &pg->flags);
 		}
@@ -194,7 +207,7 @@
  * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
  * instead we "filter out" the exec permission for non clean pages.
  */
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 {
 	struct page *pg;
 
@@ -276,7 +289,7 @@
 	 * this context might not have been activated yet when this
 	 * is called.
 	 */
-	pte = set_pte_filter(pte);
+	pte = set_pte_filter(pte, addr);
 
 	/* Perform the setting of the PTE */
 	__set_pte_at(mm, addr, ptep, pte, 0);