[MIPS] Fix 32bit kernels on R4k with 128 byte cache line size

The generated copy_page for R4k CPU with a 128 byte cache line size used
Create Dirty Exclusive cache line operations even if only part of the
cache line was filled.  This change avoids generating cache operations,
if only part of the cache line size is copied in one loop. It also
increases the maxmimum loop size, because the generated code even fits
into the available space for r4k CPUs with 128 byte cache line size.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 1edf0cb..1417c64 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -235,13 +235,12 @@
 	}
 	/*
 	 * Too much unrolling will overflow the available space in
-	 * clear_space_array / copy_page_array. 8 words sounds generous,
-	 * but a R4000 with 128 byte L2 line length can exceed even that.
+	 * clear_space_array / copy_page_array.
 	 */
-	half_clear_loop_size = min(8 * clear_word_size,
+	half_clear_loop_size = min(16 * clear_word_size,
 				   max(cache_line_size >> 1,
 				       4 * clear_word_size));
-	half_copy_loop_size = min(8 * copy_word_size,
+	half_copy_loop_size = min(16 * copy_word_size,
 				  max(cache_line_size >> 1,
 				      4 * copy_word_size));
 }
@@ -263,21 +262,23 @@
 	if (pref_bias_clear_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
+	} else if (cache_line_size == (half_clear_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
+
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
+
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 		}
-
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
-
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
-	}
+		}
 }
 
 void __cpuinit build_clear_page(void)
@@ -403,20 +404,22 @@
 	if (pref_bias_copy_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
+	} else if (cache_line_size == (half_copy_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
+
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
+
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 		}
-
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
-
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 	}
 }