[POWERPC] Allow drivers to map individual 4k pages to userspace

Some drivers have resources that they want to be able to map into
userspace that are 4k in size.  On a kernel configured with 64k pages
we currently end up mapping the 4k we want plus another 60k of
physical address space, which could contain anything.  This can
introduce security problems, for example in the case of an infiniband
adaptor where the other 60k could contain registers that some other
program is using for its communications.

This patch adds a new function, remap_4k_pfn, which drivers can use to
map a single 4k page to userspace regardless of whether the kernel is
using a 4k or a 64k page size.  Like remap_pfn_range, it would
typically be called in a driver's mmap function.  It only maps a
single 4k page, which on a 64k page kernel appears replicated 16 times
throughout a 64k page.  On a 4k page kernel it reduces to a call to
remap_pfn_range.

The way this works on a 64k kernel is that a new bit, _PAGE_4K_PFN,
gets set on the linux PTE.  This alters the way that __hash_page_4K
computes the real address to put in the HPTE.  The RPN field of the
linux PTE becomes the 4k RPN directly rather than being interpreted as
a 64k RPN.  Since the RPN field is 32 bits, this means that physical
addresses being mapped with remap_4k_pfn have to be below 2^44,
i.e. 0x100000000000.

The patch also factors out the code in arch/powerpc/mm/hash_utils_64.c
that deals with demoting a process to use 4k pages into one function
that gets called in the various different places where we need to do
that.  There were some discrepancies between exactly what was done in
the various places, such as a call to spu_flush_all_slbs in one case
but not in others.

Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3c7fe2c..aae0853 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -573,6 +573,27 @@
 	return pp;
 }
 
+/*
+ * Demote a segment to using 4k pages.
+ * For now this makes the whole process use 4k pages.
+ */
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	if (mm->context.user_psize == MMU_PAGE_4K)
+		return;
+	mm->context.user_psize = MMU_PAGE_4K;
+	mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
+	get_paca()->context = mm->context;
+	slb_flush_and_rebolt();
+#ifdef CONFIG_SPE_BASE
+	spu_flush_all_slbs(mm);
+#endif
+#endif
+}
+
+EXPORT_SYMBOL_GPL(demote_segment_4k);
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -665,15 +686,19 @@
 #ifndef CONFIG_PPC_64K_PAGES
 	rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
 #else
+	/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
+	if (pte_val(*ptep) & _PAGE_4K_PFN) {
+		demote_segment_4k(mm, ea);
+		psize = MMU_PAGE_4K;
+	}
+
 	if (mmu_ci_restrictions) {
 		/* If this PTE is non-cacheable, switch to 4k */
 		if (psize == MMU_PAGE_64K &&
 		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
 			if (user_region) {
+				demote_segment_4k(mm, ea);
 				psize = MMU_PAGE_4K;
-				mm->context.user_psize = MMU_PAGE_4K;
-				mm->context.sllp = SLB_VSID_USER |
-					mmu_psize_defs[MMU_PAGE_4K].sllp;
 			} else if (ea < VMALLOC_END) {
 				/*
 				 * some driver did a non-cacheable mapping
@@ -756,16 +781,8 @@
 	if (mmu_ci_restrictions) {
 		/* If this PTE is non-cacheable, switch to 4k */
 		if (mm->context.user_psize == MMU_PAGE_64K &&
-		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
-			mm->context.user_psize = MMU_PAGE_4K;
-			mm->context.sllp = SLB_VSID_USER |
-				mmu_psize_defs[MMU_PAGE_4K].sllp;
-			get_paca()->context = mm->context;
-			slb_flush_and_rebolt();
-#ifdef CONFIG_SPE_BASE
-			spu_flush_all_slbs(mm);
-#endif
-		}
+		    (pte_val(*ptep) & _PAGE_NO_CACHE))
+			demote_segment_4k(mm, ea);
 	}
 	if (mm->context.user_psize == MMU_PAGE_64K)
 		__hash_page_64K(ea, access, vsid, ptep, trap, local);