[MIPS] Load modules to CKSEG0 if CONFIG_BUILD_ELF64=n

This is a patch to load 64-bit modules to CKSEG0 so that can be
compiled with -msym32 option.  This makes each module ~10% smaller.

* introduce MODULE_START and MODULE_END
* custom module_alloc()
* PGD for modules
* change XTLB refill handler synthesizer
* enable -msym32 for modules again
  (revert ca78b1a5c6a6e70e052d3ea253828e49b5d07c8a)

New XTLB refill handler looks like this:

80000080 dmfc0   k0,C0_BADVADDR
80000084 bltz    k0,800000e4			# goto l_module_alloc
80000088 lui     k1,0x8046			# %high(pgd_current)
8000008c ld      k1,24600(k1)			# %low(pgd_current)
80000090 dsrl    k0,k0,0x1b			# l_vmalloc_done:
80000094 andi    k0,k0,0x1ff8
80000098 daddu   k1,k1,k0
8000009c dmfc0   k0,C0_BADVADDR
800000a0 ld      k1,0(k1)
800000a4 dsrl    k0,k0,0x12
800000a8 andi    k0,k0,0xff8
800000ac daddu   k1,k1,k0
800000b0 dmfc0   k0,C0_XCONTEXT
800000b4 ld      k1,0(k1)
800000b8 andi    k0,k0,0xff0
800000bc daddu   k1,k1,k0
800000c0 ld      k0,0(k1)
800000c4 ld      k1,8(k1)
800000c8 dsrl    k0,k0,0x6
800000cc mtc0    k0,C0_ENTRYLO0
800000d0 dsrl    k1,k1,0x6
800000d4 mtc0    k1,C0_ENTRYL01
800000d8 nop
800000dc tlbwr
800000e0 eret
800000e4 dsll    k1,k0,0x2			# l_module_alloc:
800000e8 bgez    k1,80000008			# goto l_vmalloc
800000ec lui     k1,0xc000
800000f0 dsubu   k0,k0,k1
800000f4 lui     k1,0x8046			# %high(module_pg_dir)
800000f8 beq     zero,zero,80000000
800000fc nop
80000000 beq     zero,zero,80000090		# goto l_vmalloc_done
80000004 daddiu  k1,k1,0x4000
80000008 dsll32  k1,k1,0x0			# l_vmalloc:
8000000c dsubu   k0,k0,k1
80000010 beq     zero,zero,80000090		# goto l_vmalloc_done
80000014 lui     k1,0x8046			# %high(swapper_pg_dir)

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index fec318a..492c518 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -423,6 +423,9 @@
 	label_invalid,
 	label_second_part,
 	label_leave,
+#ifdef MODULE_START
+	label_module_alloc,
+#endif
 	label_vmalloc,
 	label_vmalloc_done,
 	label_tlbw_hazard,
@@ -455,6 +458,9 @@
 
 L_LA(_second_part)
 L_LA(_leave)
+#ifdef MODULE_START
+L_LA(_module_alloc)
+#endif
 L_LA(_vmalloc)
 L_LA(_vmalloc_done)
 L_LA(_tlbw_hazard)
@@ -686,6 +692,13 @@
 	i_bgezl(p, reg, 0);
 }
 
+static void __init __attribute__((unused))
+il_bgez(u32 **p, struct reloc **r, unsigned int reg, enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_bgez(p, reg, 0);
+}
+
 /* The only general purpose registers allowed in TLB handlers. */
 #define K0		26
 #define K1		27
@@ -970,7 +983,11 @@
 	 * The vmalloc handling is not in the hotpath.
 	 */
 	i_dmfc0(p, tmp, C0_BADVADDR);
+#ifdef MODULE_START
+	il_bltz(p, r, tmp, label_module_alloc);
+#else
 	il_bltz(p, r, tmp, label_vmalloc);
+#endif
 	/* No i_nop needed here, since the next insn doesn't touch TMP. */
 
 #ifdef CONFIG_SMP
@@ -1023,8 +1040,46 @@
 {
 	long swpd = (long)swapper_pg_dir;
 
+#ifdef MODULE_START
+	long modd = (long)module_pg_dir;
+
+	l_module_alloc(l, *p);
+	/*
+	 * Assumption:
+	 * VMALLOC_START >= 0xc000000000000000UL
+	 * MODULE_START >= 0xe000000000000000UL
+	 */
+	i_SLL(p, ptr, bvaddr, 2);
+	il_bgez(p, r, ptr, label_vmalloc);
+
+	if (in_compat_space_p(MODULE_START) && !rel_lo(MODULE_START)) {
+		i_lui(p, ptr, rel_hi(MODULE_START)); /* delay slot */
+	} else {
+		/* unlikely configuration */
+		i_nop(p); /* delay slot */
+		i_LA(p, ptr, MODULE_START);
+	}
+	i_dsubu(p, bvaddr, bvaddr, ptr);
+
+	if (in_compat_space_p(modd) && !rel_lo(modd)) {
+		il_b(p, r, label_vmalloc_done);
+		i_lui(p, ptr, rel_hi(modd));
+	} else {
+		i_LA_mostly(p, ptr, modd);
+		il_b(p, r, label_vmalloc_done);
+		i_daddiu(p, ptr, ptr, rel_lo(modd));
+	}
+
+	l_vmalloc(l, *p);
+	if (in_compat_space_p(MODULE_START) && !rel_lo(MODULE_START) &&
+	    MODULE_START << 32 == VMALLOC_START)
+		i_dsll32(p, ptr, ptr, 0);	/* typical case */
+	else
+		i_LA(p, ptr, VMALLOC_START);
+#else
 	l_vmalloc(l, *p);
 	i_LA(p, ptr, VMALLOC_START);
+#endif
 	i_dsubu(p, bvaddr, bvaddr, ptr);
 
 	if (in_compat_space_p(swpd) && !rel_lo(swpd)) {