[SPARC64]: Add infrastructure for dynamic TSB sizing.

This also cleans up tsb_context_switch().  The assembler
routine is now __tsb_context_switch() and the former is
an inline function that picks out the bits from the mm_struct
and passes it into the assembler code as arguments.

setup_tsb_parms() computes the locked TLB entry to map the
TSB.  Later when we support using the physical address quad
load instructions of Cheetah+ and later, we'll simply use
the physical address for the TSB register value and set
the map virtual and PTE both to zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index a57d7f2..181c8cd 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -330,8 +330,7 @@
 
 	current->mm->start_stack =
 		(unsigned long) create_aout32_tables((char __user *)bprm->p, bprm);
-	tsb_context_switch(__pa(current->mm->pgd),
-	                   current->mm->context.sparc64_tsb);
+	tsb_context_switch(mm);
 
 	start_thread32(regs, ex.a_entry, current->mm->start_stack);
 	if (current->ptrace & PT_PTRACED)
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2784aab..26548fc 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -441,8 +441,7 @@
 
 	mm = t->task->mm;
 	if (mm)
-		tsb_context_switch(__pa(mm->pgd),
-				   mm->context.sparc64_tsb);
+		tsb_context_switch(mm);
 
 	set_thread_wsaved(0);
 
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 76f2c0b..fe266ba 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -130,48 +130,36 @@
 	 * schedule() time.
 	 *
 	 * %o0: page table physical address
-	 * %o1:	TSB address
+	 * %o1:	TSB register value
+	 * %o2:	TSB virtual address
+	 * %o3:	TSB mapping locked PTE
+	 *
+	 * We have to run this whole thing with interrupts
+	 * disabled so that the current cpu doesn't change
+	 * due to preemption.
 	 */
 	.align	32
-	.globl	tsb_context_switch
-tsb_context_switch:
+	.globl	__tsb_context_switch
+__tsb_context_switch:
 	rdpr	%pstate, %o5
 	wrpr	%o5, PSTATE_IE, %pstate
 
-	ldub	[%g6 + TI_CPU], %o3
-	sethi	%hi(trap_block), %o4
-	sllx	%o3, TRAP_BLOCK_SZ_SHIFT, %o3
-	or	%o4, %lo(trap_block), %o4
-	add	%o4, %o3, %o4
-	stx	%o0, [%o4 + TRAP_PER_CPU_PGD_PADDR]
+	ldub	[%g6 + TI_CPU], %g1
+	sethi	%hi(trap_block), %g2
+	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g1
+	or	%g2, %lo(trap_block), %g2
+	add	%g2, %g1, %g2
+	stx	%o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
 
-	brgez	%o1, 9f
-	 nop
-
-	/* Lock TSB into D-TLB.  */
-	sethi		%hi(PAGE_SIZE), %o3
-	and		%o3, %o1, %o3
-	sethi		%hi(TSBMAP_BASE), %o2
-	add		%o2, %o3, %o2
-
-	/* XXX handle PAGE_SIZE != 8K correctly...  */
 	mov	TSB_REG, %g1
-	stxa	%o2, [%g1] ASI_DMMU
+	stxa	%o1, [%g1] ASI_DMMU
 	membar	#Sync
 
-	stxa	%o2, [%g1] ASI_IMMU
+	stxa	%o1, [%g1] ASI_IMMU
 	membar	#Sync
 
-#define KERN_HIGHBITS	((_PAGE_VALID|_PAGE_SZBITS)^0xfffff80000000000)
-#define KERN_LOWBITS	(_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W | _PAGE_L)
-	sethi		%uhi(KERN_HIGHBITS), %g2
-	or		%g2, %ulo(KERN_HIGHBITS), %g2
-	sllx		%g2, 32, %g2
-	or		%g2, KERN_LOWBITS, %g2
-#undef KERN_HIGHBITS
-#undef KERN_LOWBITS
-
-	xor		%o1, %g2, %o1	
+	brz	%o2, 9f
+	 nop
 
 	/* We use entry 61 for this locked entry.  This is the spitfire
 	 * TLB entry number, and luckily cheetah masks the value with
@@ -184,11 +172,10 @@
 	stxa		%o2, [%g1] ASI_DMMU
 	membar		#Sync
 	mov		(61 << 3), %g1
-	stxa		%o1, [%g1] ASI_DTLB_DATA_ACCESS
+	stxa		%o3, [%g1] ASI_DTLB_DATA_ACCESS
 	membar		#Sync
-
 9:
 	wrpr	%o5, %pstate
 
 	retl
-	 mov	%o2, %o0
+	 nop
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index 2f84cef..dfe7144 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -9,13 +9,7 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
-
-#define TSB_ENTRY_ALIGNMENT	16
-
-struct tsb {
-	unsigned long tag;
-	unsigned long pte;
-} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+#include <asm/pgtable.h>
 
 /* We use an 8K TSB for the whole kernel, this allows to
  * handle about 4MB of modules and vmalloc mappings without
@@ -27,10 +21,10 @@
 
 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
 
-static inline unsigned long tsb_hash(unsigned long vaddr)
+static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
 {
 	vaddr >>= PAGE_SHIFT;
-	return vaddr & (KERNEL_TSB_NENTRIES - 1);
+	return vaddr & (nentries - 1);
 }
 
 static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
@@ -51,7 +45,8 @@
 	unsigned long v;
 
 	for (v = start; v < end; v += PAGE_SIZE) {
-		struct tsb *ent = &swapper_tsb[tsb_hash(v)];
+		unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
+		struct tsb *ent = &swapper_tsb[hash];
 
 		if (tag_compare(ent, v, 0)) {
 			ent->tag = 0UL;
@@ -63,8 +58,9 @@
 void flush_tsb_user(struct mmu_gather *mp)
 {
 	struct mm_struct *mm = mp->mm;
-	struct tsb *tsb = (struct tsb *) mm->context.sparc64_tsb;
+	struct tsb *tsb = mm->context.tsb;
 	unsigned long ctx = ~0UL;
+	unsigned long nentries = mm->context.tsb_nentries;
 	int i;
 
 	if (CTX_VALID(mm->context))
@@ -76,7 +72,7 @@
 
 		v &= ~0x1UL;
 
-		ent = &tsb[tsb_hash(v)];
+		ent = &tsb[tsb_hash(v, nentries)];
 		if (tag_compare(ent, v, ctx)) {
 			ent->tag = 0UL;
 			membar_storeload_storestore();
@@ -84,6 +80,83 @@
 	}
 }
 
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
+{
+	unsigned long tsb_reg, base, tsb_paddr;
+	unsigned long page_sz, tte;
+
+	mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
+
+	base = TSBMAP_BASE;
+	tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP |
+	       _PAGE_CV    | _PAGE_P | _PAGE_W);
+	tsb_paddr = __pa(mm->context.tsb);
+
+	/* Use the smallest page size that can map the whole TSB
+	 * in one TLB entry.
+	 */
+	switch (tsb_bytes) {
+	case 8192 << 0:
+		tsb_reg = 0x0UL;
+#ifdef DCACHE_ALIASING_POSSIBLE
+		base += (tsb_paddr & 8192);
+#endif
+		tte |= _PAGE_SZ8K;
+		page_sz = 8192;
+		break;
+
+	case 8192 << 1:
+		tsb_reg = 0x1UL;
+		tte |= _PAGE_SZ64K;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 2:
+		tsb_reg = 0x2UL;
+		tte |= _PAGE_SZ64K;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 3:
+		tsb_reg = 0x3UL;
+		tte |= _PAGE_SZ64K;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 4:
+		tsb_reg = 0x4UL;
+		tte |= _PAGE_SZ512K;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 5:
+		tsb_reg = 0x5UL;
+		tte |= _PAGE_SZ512K;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 6:
+		tsb_reg = 0x6UL;
+		tte |= _PAGE_SZ512K;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 7:
+		tsb_reg = 0x7UL;
+		tte |= _PAGE_SZ4MB;
+		page_sz = 4 * 1024 * 1024;
+		break;
+	};
+
+	tsb_reg |= base;
+	tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+	tte |= (tsb_paddr & ~(page_sz - 1UL));
+
+	mm->context.tsb_reg_val = tsb_reg;
+	mm->context.tsb_map_vaddr = base;
+	mm->context.tsb_map_pte = tte;
+}
+
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	unsigned long page = get_zeroed_page(GFP_KERNEL);
@@ -92,14 +165,22 @@
 	if (unlikely(!page))
 		return -ENOMEM;
 
-	mm->context.sparc64_tsb = (unsigned long *) page;
+	mm->context.tsb = (struct tsb *) page;
+	setup_tsb_params(mm, PAGE_SIZE);
 
 	return 0;
 }
 
 void destroy_context(struct mm_struct *mm)
 {
-	free_page((unsigned long) mm->context.sparc64_tsb);
+	free_page((unsigned long) mm->context.tsb);
+
+	/* We can remove these later, but for now it's useful
+	 * to catch any bogus post-destroy_context() references
+	 * to the TSB.
+	 */
+	mm->context.tsb = NULL;
+	mm->context.tsb_reg_val = 0UL;
 
 	spin_lock(&ctx_alloc_lock);
 
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 36384cf..2effeba 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -90,9 +90,20 @@
 
 #ifndef __ASSEMBLY__
 
+#define TSB_ENTRY_ALIGNMENT	16
+
+struct tsb {
+	unsigned long tag;
+	unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
 typedef struct {
 	unsigned long	sparc64_ctx_val;
-	unsigned long	*sparc64_tsb;
+	struct tsb	*tsb;
+	unsigned long	tsb_nentries;
+	unsigned long	tsb_reg_val;
+	unsigned long	tsb_map_vaddr;
+	unsigned long	tsb_map_pte;
 } mm_context_t;
 
 #endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 0dffb4c..0a950f1 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -22,7 +22,15 @@
 extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 extern void destroy_context(struct mm_struct *mm);
 
-extern unsigned long tsb_context_switch(unsigned long pgd_pa, unsigned long *tsb);
+extern void __tsb_context_switch(unsigned long pgd_pa, unsigned long tsb_reg,
+				 unsigned long tsb_vaddr, unsigned long tsb_pte);
+
+static inline void tsb_context_switch(struct mm_struct *mm)
+{
+	__tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val,
+			     mm->context.tsb_map_vaddr,
+			     mm->context.tsb_map_pte);
+}
 
 /* Set MMU context in the actual hardware. */
 #define load_secondary_context(__mm) \
@@ -52,8 +60,7 @@
 
 	if (!ctx_valid || (old_mm != mm)) {
 		load_secondary_context(mm);
-		tsb_context_switch(__pa(mm->pgd),
-				   mm->context.sparc64_tsb);
+		tsb_context_switch(mm);
 	}
 
 	/* Even if (mm == old_mm) we _must_ check
@@ -91,7 +98,7 @@
 
 	load_secondary_context(mm);
 	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-	tsb_context_switch(__pa(mm->pgd), mm->context.sparc64_tsb);
+	tsb_context_switch(mm);
 }
 
 #endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
index 03d272e..1f93b7d 100644
--- a/include/asm-sparc64/tsb.h
+++ b/include/asm-sparc64/tsb.h
@@ -19,7 +19,7 @@
  * 	stxa		%g5, [%g0] ASI_{D,I}TLB_DATA_IN
  * 	retry
  *
-
+ *
  * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
  * PTE.  The TAG is of the same layout as the TLB TAG TARGET mmu
  * register which is: