MIPS: Optimize pgd_init and pmd_init

On a dual issue processor GCC generates code that saves a couple of
clock cycles per loop if we rearrange things slightly.  Checking for
p != end saves a SLTU per loop, moving the increment to the middle can
let it dual issue on multi-issue processors.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/4249/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index cda4e30..2540779 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -26,17 +26,17 @@
  	p = (unsigned long *) page;
 	end = p + PTRS_PER_PGD;
 
-	while (p < end) {
+	do {
 		p[0] = entry;
 		p[1] = entry;
 		p[2] = entry;
 		p[3] = entry;
 		p[4] = entry;
-		p[5] = entry;
-		p[6] = entry;
-		p[7] = entry;
 		p += 8;
-	}
+		p[-3] = entry;
+		p[-2] = entry;
+		p[-1] = entry;
+	} while (p != end);
 }
 
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -47,17 +47,17 @@
  	p = (unsigned long *) addr;
 	end = p + PTRS_PER_PMD;
 
-	while (p < end) {
+	do {
 		p[0] = pagetable;
 		p[1] = pagetable;
 		p[2] = pagetable;
 		p[3] = pagetable;
 		p[4] = pagetable;
-		p[5] = pagetable;
-		p[6] = pagetable;
-		p[7] = pagetable;
 		p += 8;
-	}
+		p[-3] = pagetable;
+		p[-2] = pagetable;
+		p[-1] = pagetable;
+	} while (p != end);
 }
 #endif