sparc64: Implement IRQ stacks.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 3473e25..e3dd930 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -93,4 +93,8 @@
 void __trigger_all_cpu_backtrace(void);
 #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
 
+extern void *hardirq_stack[NR_CPUS];
+extern void *softirq_stack[NR_CPUS];
+#define __ARCH_HAS_DO_SOFTIRQ
+
 #endif
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index ba43d85..9b6689d 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -682,10 +682,32 @@
 	       ino, virt_irq);
 }
 
+void *hardirq_stack[NR_CPUS];
+void *softirq_stack[NR_CPUS];
+
+static __attribute__((always_inline)) void *set_hardirq_stack(void)
+{
+	void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
+
+	__asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
+	if (orig_sp < sp ||
+	    orig_sp > (sp + THREAD_SIZE)) {
+		sp += THREAD_SIZE - 192 - STACK_BIAS;
+		__asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
+	}
+
+	return orig_sp;
+}
+static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
+{
+	__asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
+}
+
 void handler_irq(int irq, struct pt_regs *regs)
 {
 	unsigned long pstate, bucket_pa;
 	struct pt_regs *old_regs;
+	void *orig_sp;
 
 	clear_softint(1 << irq);
 
@@ -703,6 +725,8 @@
 			       "i" (PSTATE_IE)
 			     : "memory");
 
+	orig_sp = set_hardirq_stack();
+
 	while (bucket_pa) {
 		struct irq_desc *desc;
 		unsigned long next_pa;
@@ -719,10 +743,38 @@
 		bucket_pa = next_pa;
 	}
 
+	restore_hardirq_stack(orig_sp);
+
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
+void do_softirq(void)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	if (local_softirq_pending()) {
+		void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+
+		sp += THREAD_SIZE - 192 - STACK_BIAS;
+
+		__asm__ __volatile__("mov %%sp, %0\n\t"
+				     "mov %1, %%sp"
+				     : "=&r" (orig_sp)
+				     : "r" (sp));
+		__do_softirq();
+		__asm__ __volatile__("mov %0, %%sp"
+				     : : "r" (orig_sp));
+	}
+
+	local_irq_restore(flags);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 void fixup_irqs(void)
 {
diff --git a/arch/sparc64/kernel/kstack.h b/arch/sparc64/kernel/kstack.h
new file mode 100644
index 0000000..43909d5
--- /dev/null
+++ b/arch/sparc64/kernel/kstack.h
@@ -0,0 +1,58 @@
+#ifndef _KSTACK_H
+#define _KSTACK_H
+
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+/* SP must be STACK_BIAS adjusted already.  */
+static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
+{
+	unsigned long base = (unsigned long) tp;
+
+	if (sp >= (base + sizeof(struct thread_info)) &&
+	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		return true;
+
+	base = (unsigned long) hardirq_stack[tp->cpu];
+	if (sp >= base &&
+	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		return true;
+	base = (unsigned long) softirq_stack[tp->cpu];
+	if (sp >= base &&
+	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		return true;
+
+	return false;
+}
+
+/* Does "regs" point to a valid pt_regs trap frame?  */
+static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs)
+{
+	unsigned long base = (unsigned long) tp;
+	unsigned long addr = (unsigned long) regs;
+
+	if (addr >= base &&
+	    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+		goto check_magic;
+
+	base = (unsigned long) hardirq_stack[tp->cpu];
+	if (addr >= base &&
+	    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+		goto check_magic;
+	base = (unsigned long) softirq_stack[tp->cpu];
+	if (addr >= base &&
+	    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+		goto check_magic;
+
+	return false;
+
+check_magic:
+	if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC)
+		return true;
+	return false;
+
+}
+
+#endif /* _KSTACK_H */
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 7f5debd..15f4178 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -52,6 +52,8 @@
 #include <asm/irq_regs.h>
 #include <asm/smp.h>
 
+#include "kstack.h"
+
 static void sparc64_yield(int cpu)
 {
 	if (tlb_type != hypervisor)
@@ -235,19 +237,6 @@
 struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
 static DEFINE_SPINLOCK(global_reg_snapshot_lock);
 
-static bool kstack_valid(struct thread_info *tp, struct reg_window *rw)
-{
-	unsigned long thread_base, fp;
-
-	thread_base = (unsigned long) tp;
-	fp = (unsigned long) rw;
-
-	if (fp < (thread_base + sizeof(struct thread_info)) ||
-	    fp >= (thread_base + THREAD_SIZE))
-		return false;
-	return true;
-}
-
 static void __global_reg_self(struct thread_info *tp, struct pt_regs *regs,
 			      int this_cpu)
 {
@@ -264,11 +253,11 @@
 
 		rw = (struct reg_window *)
 			(regs->u_regs[UREG_FP] + STACK_BIAS);
-		if (kstack_valid(tp, rw)) {
+		if (kstack_valid(tp, (unsigned long) rw)) {
 			global_reg_snapshot[this_cpu].i7 = rw->ins[7];
 			rw = (struct reg_window *)
 				(rw->ins[6] + STACK_BIAS);
-			if (kstack_valid(tp, rw))
+			if (kstack_valid(tp, (unsigned long) rw))
 				global_reg_snapshot[this_cpu].rpc = rw->ins[7];
 		}
 	} else {
@@ -828,7 +817,7 @@
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
-	unsigned long thread_info_base;
+	struct thread_info *tp;
 	struct reg_window *rw;
         unsigned long ret = 0;
 	int count = 0; 
@@ -837,14 +826,12 @@
             task->state == TASK_RUNNING)
 		goto out;
 
-	thread_info_base = (unsigned long) task_stack_page(task);
+	tp = task_thread_info(task);
 	bias = STACK_BIAS;
 	fp = task_thread_info(task)->ksp + bias;
 
 	do {
-		/* Bogus frame pointer? */
-		if (fp < (thread_info_base + sizeof(struct thread_info)) ||
-		    fp >= (thread_info_base + THREAD_SIZE))
+		if (!kstack_valid(tp, fp))
 			break;
 		rw = (struct reg_window *) fp;
 		pc = rw->ins[7];
diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c
index e9d7f06..237e7f8 100644
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -5,6 +5,8 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#include "kstack.h"
+
 void save_stack_trace(struct stack_trace *trace)
 {
 	unsigned long ksp, fp, thread_base;
@@ -24,17 +26,13 @@
 		struct pt_regs *regs;
 		unsigned long pc;
 
-		/* Bogus frame pointer? */
-		if (fp < (thread_base + sizeof(struct thread_info)) ||
-		    fp > (thread_base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		if (!kstack_valid(tp, fp))
 			break;
 
 		sf = (struct sparc_stackf *) fp;
 		regs = (struct pt_regs *) (sf + 1);
 
-		if (((unsigned long)regs <=
-		     (thread_base + THREAD_SIZE - sizeof(*regs))) &&
-		    (regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
+		if (kstack_is_trap_frame(tp, regs)) {
 			if (!(regs->tstate & TSTATE_PRIV))
 				break;
 			pc = regs->tpc;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 404e856..3d92412 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -39,6 +39,7 @@
 #include <asm/prom.h>
 
 #include "entry.h"
+#include "kstack.h"
 
 /* When an irrecoverable trap occurs at tl > 0, the trap entry
  * code logs the trap state registers at every level in the trap
@@ -2115,14 +2116,12 @@
 		struct pt_regs *regs;
 		unsigned long pc;
 
-		/* Bogus frame pointer? */
-		if (fp < (thread_base + sizeof(struct thread_info)) ||
-		    fp >= (thread_base + THREAD_SIZE))
+		if (!kstack_valid(tp, fp))
 			break;
 		sf = (struct sparc_stackf *) fp;
 		regs = (struct pt_regs *) (sf + 1);
 
-		if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
+		if (kstack_is_trap_frame(tp, regs)) {
 			if (!(regs->tstate & TSTATE_PRIV))
 				break;
 			pc = regs->tpc;
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 734caf0..fad90dd 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -49,6 +49,28 @@
 	cmp		%sp, %g3
 	bg,pt		%xcc, 1f
 	 nop
+	lduh		[%g6 + TI_CPU], %g1
+	sethi		%hi(hardirq_stack), %g3
+	or		%g3, %lo(hardirq_stack), %g3
+	sllx		%g1, 3, %g1
+	ldx		[%g3 + %g1], %g7
+	sub		%g7, STACK_BIAS, %g7
+	cmp		%sp, %g7
+	bleu,pt		%xcc, 2f
+	 sethi		%hi(THREAD_SIZE), %g3
+	add		%g7, %g3, %g7
+	cmp		%sp, %g7
+	blu,pn		%xcc, 1f
+2:	 sethi		%hi(softirq_stack), %g3
+	or		%g3, %lo(softirq_stack), %g3
+	ldx		[%g3 + %g1], %g7
+	cmp		%sp, %g7
+	bleu,pt		%xcc, 2f
+	 sethi		%hi(THREAD_SIZE), %g3
+	add		%g7, %g3, %g7
+	cmp		%sp, %g7
+	blu,pn		%xcc, 1f
+	 nop
 	/* If we are already on ovstack, don't hop onto it
 	 * again, we are already trying to output the stack overflow
 	 * message.
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 4e821b3..217de3e 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -49,6 +49,7 @@
 #include <asm/sstate.h>
 #include <asm/mdesc.h>
 #include <asm/cpudata.h>
+#include <asm/irq.h>
 
 #define MAX_PHYS_ADDRESS	(1UL << 42UL)
 #define KPTE_BITMAP_CHUNK_SZ	(256UL * 1024UL * 1024UL)
@@ -1771,6 +1772,16 @@
 	if (tlb_type == hypervisor)
 		sun4v_mdesc_init();
 
+	/* Once the OF device tree and MDESC have been setup, we know
+	 * the list of possible cpus.  Therefore we can allocate the
+	 * IRQ stacks.
+	 */
+	for_each_possible_cpu(i) {
+		/* XXX Use node local allocations... XXX */
+		softirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+		hardirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+	}
+
 	/* Setup bootmem... */
 	last_valid_pfn = end_pfn = bootmem_init(phys_base);