[MIPS] MT: Improved multithreading support.
    
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 2d9624f..e3a6172 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -157,7 +157,6 @@
 	 * Oops. The kernel tried to access some bad page. We'll have to
 	 * terminate things with extreme prejudice.
 	 */
-
 	bust_spinlocks(1);
 
 	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
@@ -188,11 +187,20 @@
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs))
 		goto no_context;
-
+	else
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
+#if 0
+		printk("do_page_fault() #3: sending SIGBUS to %s for "
+		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
+		       tsk->comm,
+		       write ? "write access to" : "read access from",
+		       field, address,
+		       field, (unsigned long) regs->cp0_epc,
+		       field, (unsigned long) regs->regs[31]);
+#endif
 	tsk->thread.cp0_badvaddr = address;
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
@@ -201,7 +209,6 @@
 	force_sig_info(SIGBUS, &info, tsk);
 
 	return;
-
 vmalloc_fault:
 	{
 		/*
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index a865f239..9dca099 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -32,13 +32,35 @@
 				     "nop; nop; nop; nop; nop; nop;\n\t" \
 				     ".set reorder\n\t")
 
+/* Atomicity and interruptability */
+#ifdef CONFIG_MIPS_MT_SMTC
+
+#include <asm/smtc.h>
+#include <asm/mipsmtregs.h>
+
+#define ENTER_CRITICAL(flags) \
+	{ \
+	unsigned int mvpflags; \
+	local_irq_save(flags);\
+	mvpflags = dvpe()
+#define EXIT_CRITICAL(flags) \
+	evpe(mvpflags); \
+	local_irq_restore(flags); \
+	}
+#else
+
+#define ENTER_CRITICAL(flags) local_irq_save(flags)
+#define EXIT_CRITICAL(flags) local_irq_restore(flags)
+
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 void local_flush_tlb_all(void)
 {
 	unsigned long flags;
 	unsigned long old_ctx;
 	int entry;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
 	write_c0_entrylo0(0);
@@ -57,7 +79,7 @@
 	}
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 
 /* All entries common to a mm share an asid.  To effectively flush
@@ -87,6 +109,7 @@
 		unsigned long flags;
 		int size;
 
+		ENTER_CRITICAL(flags);
 		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 		size = (size + 1) >> 1;
 		local_irq_save(flags);
@@ -120,7 +143,7 @@
 		} else {
 			drop_mmu_context(mm, cpu);
 		}
-		local_irq_restore(flags);
+		EXIT_CRITICAL(flags);
 	}
 }
 
@@ -129,9 +152,9 @@
 	unsigned long flags;
 	int size;
 
+	ENTER_CRITICAL(flags);
 	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	size = (size + 1) >> 1;
-	local_irq_save(flags);
 	if (size <= current_cpu_data.tlbsize / 2) {
 		int pid = read_c0_entryhi();
 
@@ -162,7 +185,7 @@
 	} else {
 		local_flush_tlb_all();
 	}
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
@@ -175,7 +198,7 @@
 
 		newpid = cpu_asid(cpu, vma->vm_mm);
 		page &= (PAGE_MASK << 1);
-		local_irq_save(flags);
+		ENTER_CRITICAL(flags);
 		oldpid = read_c0_entryhi();
 		write_c0_entryhi(page | newpid);
 		mtc0_tlbw_hazard();
@@ -194,7 +217,7 @@
 
 	finish:
 		write_c0_entryhi(oldpid);
-		local_irq_restore(flags);
+		EXIT_CRITICAL(flags);
 	}
 }
 
@@ -207,7 +230,7 @@
 	unsigned long flags;
 	int oldpid, idx;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 	oldpid = read_c0_entryhi();
 	page &= (PAGE_MASK << 1);
 	write_c0_entryhi(page);
@@ -226,7 +249,7 @@
 	}
 	write_c0_entryhi(oldpid);
 
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 
 /*
@@ -249,7 +272,7 @@
 	if (current->active_mm != vma->vm_mm)
 		return;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 
 	pid = read_c0_entryhi() & ASID_MASK;
 	address &= (PAGE_MASK << 1);
@@ -277,7 +300,7 @@
 	else
 		tlb_write_indexed();
 	tlbw_use_hazard();
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 
 #if 0
@@ -291,7 +314,7 @@
 	pte_t *ptep;
 	int idx;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 	address &= (PAGE_MASK << 1);
 	asid = read_c0_entryhi() & ASID_MASK;
 	write_c0_entryhi(address | asid);
@@ -310,7 +333,7 @@
 	else
 		tlb_write_indexed();
 	tlbw_use_hazard();
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 #endif
 
@@ -322,7 +345,7 @@
 	unsigned long old_pagemask;
 	unsigned long old_ctx;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
 	old_pagemask = read_c0_pagemask();
@@ -342,7 +365,7 @@
 	BARRIER;
 	write_c0_pagemask(old_pagemask);
 	local_flush_tlb_all();
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 }
 
 /*
@@ -362,7 +385,7 @@
 	unsigned long old_pagemask;
 	unsigned long old_ctx;
 
-	local_irq_save(flags);
+	ENTER_CRITICAL(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
 	old_pagemask = read_c0_pagemask();
@@ -386,10 +409,11 @@
 	write_c0_entryhi(old_ctx);
 	write_c0_pagemask(old_pagemask);
 out:
-	local_irq_restore(flags);
+	EXIT_CRITICAL(flags);
 	return ret;
 }
 
+extern void __init sanitize_tlb_entries(void);
 static void __init probe_tlb(unsigned long config)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
@@ -402,6 +426,14 @@
 	 */
 	if ((c->processor_id & 0xff0000) == PRID_COMP_LEGACY)
 		return;
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * If TLB is shared in SMTC system, total size already
+	 * has been calculated and written into cpu_data tlbsize
+	 */
+	if((smtc_status & SMTC_TLB_SHARED) == SMTC_TLB_SHARED)
+		return;
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 	reg = read_c0_config1();
 	if (!((config >> 7) & 3))
@@ -410,6 +442,15 @@
 	c->tlbsize = ((reg >> 25) & 0x3f) + 1;
 }
 
+static int __initdata ntlb = 0;
+static int __init set_ntlb(char *str)
+{
+	get_option(&str, &ntlb);
+	return 1;
+}
+
+__setup("ntlb=", set_ntlb);
+
 void __init tlb_init(void)
 {
 	unsigned int config = read_c0_config();
@@ -432,5 +473,15 @@
 
 	/* Did I tell you that ARC SUCKS?  */
 
+	if (ntlb) {
+		if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) {
+			int wired = current_cpu_data.tlbsize - ntlb;
+			write_c0_wired(wired);
+			write_c0_index(wired-1);
+			printk ("Restricting TLB to %d entries\n", ntlb);
+		} else
+			printk("Ignoring invalid argument ntlb=%d\n", ntlb);
+	}
+
 	build_tlb_refill_handler();
 }
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index c5eea6a..053dbac 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -7,6 +7,16 @@
  *
  * Copyright (C) 2004,2005 by Thiemo Seufer
  * Copyright (C) 2005  Maciej W. Rozycki
+ * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
+ *
+ * ... and the days got worse and worse and now you see
+ * I've gone completly out of my mind.
+ *
+ * They're coming to take me a away haha
+ * they're coming to take me a away hoho hihi haha
+ * to the funny farm where code is beautiful all the time ...
+ *
+ * (Condolences to Napoleon XIV)
  */
 
 #include <stdarg.h>
@@ -68,6 +78,7 @@
 	BIMM = 0x040,
 	JIMM = 0x080,
 	FUNC = 0x100,
+	SET = 0x200
 };
 
 #define OP_MASK		0x2f
@@ -86,6 +97,8 @@
 #define JIMM_SH		0
 #define FUNC_MASK	0x2f
 #define FUNC_SH		0
+#define SET_MASK	0x7
+#define SET_SH		0
 
 enum opcode {
 	insn_invalid,
@@ -129,8 +142,8 @@
 	{ insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM },
 	{ insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM },
 	{ insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD },
-	{ insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD },
-	{ insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD },
+	{ insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD | SET},
+	{ insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD | SET},
 	{ insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE },
 	{ insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE },
 	{ insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE },
@@ -145,8 +158,8 @@
 	{ insn_lld, M(lld_op,0,0,0,0,0), RS | RT | SIMM },
 	{ insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM },
 	{ insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM },
-	{ insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD },
-	{ insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD },
+	{ insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD | SET},
+	{ insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD | SET},
 	{ insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM },
 	{ insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 },
 	{ insn_sc, M(sc_op,0,0,0,0,0), RS | RT | SIMM },
@@ -242,6 +255,14 @@
 	return arg & FUNC_MASK;
 }
 
+static __init u32 build_set(u32 arg)
+{
+	if (arg & ~SET_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return arg & SET_MASK;
+}
+
 /*
  * The order of opcode arguments is implicitly left to right,
  * starting with RS and ending with FUNC or IMM.
@@ -273,6 +294,7 @@
 	if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32));
 	if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32));
 	if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32));
+	if (ip->fields & SET) op |= build_set(va_arg(ap, u32));
 	va_end(ap);
 
 	**buf = op;
@@ -358,8 +380,8 @@
 I_u1s2(_bltz);
 I_u1s2(_bltzl);
 I_u1u2s3(_bne);
-I_u1u2(_dmfc0);
-I_u1u2(_dmtc0);
+I_u1u2u3(_dmfc0);
+I_u1u2u3(_dmtc0);
 I_u2u1s3(_daddiu);
 I_u3u1u2(_daddu);
 I_u2u1u3(_dsll);
@@ -376,8 +398,8 @@
 I_u2s3u1(_lld);
 I_u1s2(_lui);
 I_u2s3u1(_lw);
-I_u1u2(_mfc0);
-I_u1u2(_mtc0);
+I_u1u2u3(_mfc0);
+I_u1u2u3(_mtc0);
 I_u2u1u3(_ori);
 I_0(_rfe);
 I_u2s3u1(_sc);
@@ -451,8 +473,8 @@
 # define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh)
 # define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh)
 # define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh)
-# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd)
-# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd)
+# define i_MFC0(buf, rt, rd...) i_dmfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd...) i_dmtc0(buf, rt, rd)
 # define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val)
 # define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd)
 # define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd)
@@ -464,8 +486,8 @@
 # define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh)
 # define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh)
 # define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh)
-# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd)
-# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd)
+# define i_MFC0(buf, rt, rd...) i_mfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd...) i_mtc0(buf, rt, rd)
 # define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val)
 # define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd)
 # define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd)
@@ -670,14 +692,15 @@
 #define K1		27
 
 /* Some CP0 registers */
-#define C0_INDEX	0
-#define C0_ENTRYLO0	2
-#define C0_ENTRYLO1	3
-#define C0_CONTEXT	4
-#define C0_BADVADDR	8
-#define C0_ENTRYHI	10
-#define C0_EPC		14
-#define C0_XCONTEXT	20
+#define C0_INDEX	0, 0
+#define C0_ENTRYLO0	2, 0
+#define C0_TCBIND	2, 2
+#define C0_ENTRYLO1	3, 0
+#define C0_CONTEXT	4, 0
+#define C0_BADVADDR	8, 0
+#define C0_ENTRYHI	10, 0
+#define C0_EPC		14, 0
+#define C0_XCONTEXT	20, 0
 
 #ifdef CONFIG_64BIT
 # define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT)
@@ -951,12 +974,20 @@
 	/* No i_nop needed here, since the next insn doesn't touch TMP. */
 
 #ifdef CONFIG_SMP
+# ifdef  CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC uses TCBind value as "CPU" index
+	 */
+	i_mfc0(p, ptr, C0_TCBIND);
+	i_dsrl(p, ptr, ptr, 19);
+# else
 	/*
 	 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3
 	 * stored in CONTEXT.
 	 */
 	i_dmfc0(p, ptr, C0_CONTEXT);
 	i_dsrl(p, ptr, ptr, 23);
+#endif
 	i_LA_mostly(p, tmp, pgdc);
 	i_daddu(p, ptr, ptr, tmp);
 	i_dmfc0(p, tmp, C0_BADVADDR);
@@ -1014,9 +1045,21 @@
 
 	/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
 #ifdef CONFIG_SMP
+#ifdef  CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC uses TCBind value as "CPU" index
+	 */
+	i_mfc0(p, ptr, C0_TCBIND);
+	i_LA_mostly(p, tmp, pgdc);
+	i_srl(p, ptr, ptr, 19);
+#else
+	/*
+	 * smp_processor_id() << 3 is stored in CONTEXT.
+         */
 	i_mfc0(p, ptr, C0_CONTEXT);
 	i_LA_mostly(p, tmp, pgdc);
 	i_srl(p, ptr, ptr, 23);
+#endif
 	i_addu(p, ptr, tmp, ptr);
 #else
 	i_LA_mostly(p, ptr, pgdc);