powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault

upatepp can get called for a nohpte fault when we find from the linux
page table that the translation was hashed before. In that case
we are sure that there is no existing translation, hence we could
avoid doing tlbie.

We could possibly race with a parallel fault filling the TLB. But
that should be ok because updatepp is only ever relaxing permissions.
We also look at linux pte permission bits when filling hash pte
permission bits. We also hold the linux pte busy bits while
inserting/updating a hashpte entry, hence a paralle update of
linux pte is not possible. On the other hand mprotect involves
ptep_modify_prot_start which cause a hpte invalidate and not updatepp.

Performance number:
We use randbox_access_bench written by Anton.

Kernel with THP disabled and smaller hash page table size.

    86.60%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_updatepp
     2.10%  random_access_b  random_access_bench              [.] doit
     1.99%  random_access_b  [kernel.kallsyms]                [k] .do_raw_spin_lock
     1.85%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_insert
     1.26%  random_access_b  [kernel.kallsyms]                [k] .native_flush_hash_range
     1.18%  random_access_b  [kernel.kallsyms]                [k] .__delay
     0.69%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_remove
     0.37%  random_access_b  [kernel.kallsyms]                [k] .clear_user_page
     0.34%  random_access_b  [kernel.kallsyms]                [k] .__hash_page_64K
     0.32%  random_access_b  [kernel.kallsyms]                [k] fast_exception_return
     0.30%  random_access_b  [kernel.kallsyms]                [k] .hash_page_mm

With Fix:

    27.54%  random_access_b  random_access_bench              [.] doit
    22.90%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_insert
     5.76%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_remove
     5.20%  random_access_b  [kernel.kallsyms]                [k] fast_exception_return
     5.12%  random_access_b  [kernel.kallsyms]                [k] .__hash_page_64K
     4.80%  random_access_b  [kernel.kallsyms]                [k] .hash_page_mm
     3.31%  random_access_b  [kernel.kallsyms]                [k] data_access_common
     1.84%  random_access_b  [kernel.kallsyms]                [k] .trace_hardirqs_on_caller

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e5c0919..c8175a3 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -42,7 +42,7 @@
 					 unsigned long newpp, 
 					 unsigned long vpn,
 					 int bpsize, int apsize,
-					 int ssize, int local);
+					 int ssize, unsigned long flags);
 	void            (*hpte_updateboltedpp)(unsigned long newpp, 
 					       unsigned long ea,
 					       int psize, int ssize);
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index aeebc94..4f13c3e 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -316,27 +316,33 @@
 	return hash & 0x7fffffffffUL;
 }
 
+#define HPTE_LOCAL_UPDATE	0x1
+#define HPTE_NOHPTE_UPDATE	0x2
+
 extern int __hash_page_4K(unsigned long ea, unsigned long access,
 			  unsigned long vsid, pte_t *ptep, unsigned long trap,
-			  unsigned int local, int ssize, int subpage_prot);
+			  unsigned long flags, int ssize, int subpage_prot);
 extern int __hash_page_64K(unsigned long ea, unsigned long access,
 			   unsigned long vsid, pte_t *ptep, unsigned long trap,
-			   unsigned int local, int ssize);
+			   unsigned long flags, int ssize);
 struct mm_struct;
 unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
-extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap);
-extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
+extern int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+			unsigned long access, unsigned long trap,
+			unsigned long flags);
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+		     unsigned long dsisr);
 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
-		     pte_t *ptep, unsigned long trap, int local, int ssize,
-		     unsigned int shift, unsigned int mmu_psize);
+		     pte_t *ptep, unsigned long trap, unsigned long flags,
+		     int ssize, unsigned int shift, unsigned int mmu_psize);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern int __hash_page_thp(unsigned long ea, unsigned long access,
 			   unsigned long vsid, pmd_t *pmdp, unsigned long trap,
-			   int local, int ssize, unsigned int psize);
+			   unsigned long flags, int ssize, unsigned int psize);
 #else
 static inline int __hash_page_thp(unsigned long ea, unsigned long access,
 				  unsigned long vsid, pmd_t *pmdp,
-				  unsigned long trap, int local,
+				  unsigned long trap, unsigned long flags,
 				  int ssize, unsigned int psize)
 {
 	BUG();
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 4d3ecd8..23d351c 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -125,11 +125,11 @@
 
 
 extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
-			    int ssize, int local);
+			    int ssize, unsigned long flags);
 extern void flush_hash_range(unsigned long number, int local);
 extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
 				pmd_t *pmdp, unsigned int psize, int ssize,
-				int local);
+				unsigned long flags);
 
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {