sh: Bring SMP support back from the dead.

There was a very preliminary bunch of SMP code scattered around for the
SH7604 microcontrollers from way back when, and it has mostly suffered
bitrot since then. With the tree already having been slowly getting
prepped for SMP, this plugs in most of the remaining platform-independent
bits.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index fdc245b..c217c4b 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -22,6 +22,7 @@
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/ubc.h>
+#include <asm/smp.h>
 
 /*
  * Generic wrapper for command line arguments to disable on-chip
@@ -216,8 +217,11 @@
  * Each processor family is still responsible for doing its own probing
  * and cache configuration in detect_cpu_and_cache_system().
  */
-asmlinkage void __init sh_cpu_init(void)
+
+asmlinkage void __cpuinit sh_cpu_init(void)
 {
+	current_thread_info()->cpu = hard_smp_processor_id();
+
 	/* First, probe the CPU */
 	detect_cpu_and_cache_system();
 
@@ -227,9 +231,10 @@
 	/* Init the cache */
 	cache_init();
 
-	shm_align_mask = max_t(unsigned long,
-			       current_cpu_data.dcache.way_size - 1,
-			       PAGE_SIZE - 1);
+	if (raw_smp_processor_id() == 0)
+		shm_align_mask = max_t(unsigned long,
+				       current_cpu_data.dcache.way_size - 1,
+				       PAGE_SIZE - 1);
 
 	/* Disable the FPU */
 	if (fpu_disabled) {
@@ -268,6 +273,7 @@
 	 * like PTRACE_SINGLESTEP or doing hardware watchpoints in GDB.  So ..
 	 * we wake it up and hope that all is well.
 	 */
-	ubc_wakeup();
+	if (raw_smp_processor_id() == 0)
+		ubc_wakeup();
 	speculative_execution_init();
 }
diff --git a/arch/sh/kernel/head.S b/arch/sh/kernel/head.S
index 0bccc0c..3338239 100644
--- a/arch/sh/kernel/head.S
+++ b/arch/sh/kernel/head.S
@@ -54,8 +54,8 @@
 	mov.l	1f, r0		! MD=1, RB=0, BL=0, IMASK=0xF
 	ldc	r0, sr
 	!			Initialize global interrupt mask
-	mov	#0, r0
 #ifdef CONFIG_CPU_HAS_SR_RB
+	mov	#0, r0
 	ldc	r0, r6_bank
 #endif
 	
@@ -72,15 +72,18 @@
 	!
 	mov.l	2f, r0
 	mov	r0, r15		! Set initial r15 (stack pointer)
-	mov	#(THREAD_SIZE >> 10), r1
-	shll8	r1		! r1 = THREAD_SIZE
-	shll2	r1
-	sub	r1, r0		!
 #ifdef CONFIG_CPU_HAS_SR_RB
+	mov.l	7f, r0
 	ldc	r0, r7_bank	! ... and initial thread_info
 #endif
 	
 	!			Clear BSS area
+#ifdef CONFIG_SMP	
+	mov.l	3f, r0
+	cmp/eq	#0, r0		! skip clear if set to zero
+	bt	10f
+#endif
+	
 	mov.l	3f, r1
 	add	#4, r1
 	mov.l	4f, r2
@@ -89,13 +92,14 @@
 	bf/s	9b		! while (r1 < r2)
 	 mov.l	r0,@-r2
 
+10:		
 	!			Additional CPU initialization
 	mov.l	6f, r0
 	jsr	@r0
 	 nop
 
 	SYNCO()			! Wait for pending instructions..
-
+	
 	!			Start kernel
 	mov.l	5f, r0
 	jmp	@r0
@@ -107,8 +111,10 @@
 #else
 1:	.long	0x400080F0		! MD=1, RB=0, BL=0, FD=1, IMASK=0xF
 #endif
+ENTRY(stack_start)
 2:	.long	init_thread_union+THREAD_SIZE
 3:	.long	__bss_start
 4:	.long	_end
 5:	.long	start_kernel
 6:	.long	sh_cpu_init
+7:	.long	init_thread_union
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index f93d5ff..94075e1 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -3,69 +3,41 @@
  *
  * SMP support for the SuperH processors.
  *
- * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2002 - 2007 Paul Mundt
+ * Copyright (C) 2006 - 2007 Akio Idehara
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-
 #include <linux/err.h>
 #include <linux/cache.h>
 #include <linux/cpumask.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/threads.h>
+#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/time.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-
+#include <linux/interrupt.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
 
-/*
- * This was written with the Sega Saturn (SMP SH-2 7604) in mind,
- * but is designed to be usable regardless if there's an MMU
- * present or not.
- */
-struct sh_cpuinfo cpu_data[NR_CPUS];
-
-extern void per_cpu_trap_init(void);
+int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
+int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
 cpumask_t cpu_online_map;
 EXPORT_SYMBOL(cpu_online_map);
+
 static atomic_t cpus_booted = ATOMIC_INIT(0);
 
-/* These are defined by the board-specific code. */
-
-/*
- * Cause the function described by call_data to be executed on the passed
- * cpu.  When the function has finished, increment the finished field of
- * call_data.
- */
-void __smp_send_ipi(unsigned int cpu, unsigned int action);
-
-/*
- * Find the number of available processors
- */
-unsigned int __smp_probe_cpus(void);
-
-/*
- * Start a particular processor
- */
-void __smp_slave_init(unsigned int cpu);
-
 /*
  * Run specified function on a particular processor.
  */
@@ -73,74 +45,123 @@
 
 static inline void __init smp_store_cpu_info(unsigned int cpu)
 {
-	cpu_data[cpu].loops_per_jiffy = loops_per_jiffy;
+	struct sh_cpuinfo *c = cpu_data + cpu;
+
+	c->loops_per_jiffy = loops_per_jiffy;
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int cpu = smp_processor_id();
-	int i;
 
-	atomic_set(&cpus_booted, 1);
-	smp_store_cpu_info(cpu);
-	
-	for (i = 0; i < __smp_probe_cpus(); i++)
-		cpu_set(i, cpu_possible_map);
+	init_new_context(current, &init_mm);
+	current_thread_info()->cpu = cpu;
+	plat_prepare_cpus(max_cpus);
+
+#ifndef CONFIG_HOTPLUG_CPU
+	cpu_present_map = cpu_possible_map;
+#endif
 }
 
 void __devinit smp_prepare_boot_cpu(void)
 {
 	unsigned int cpu = smp_processor_id();
 
+	__cpu_number_map[0] = cpu;
+	__cpu_logical_map[0] = cpu;
+
 	cpu_set(cpu, cpu_online_map);
 	cpu_set(cpu, cpu_possible_map);
 }
 
-int __cpu_up(unsigned int cpu)
+asmlinkage void __cpuinit start_secondary(void)
 {
-	struct task_struct *tsk;
+	unsigned int cpu;
+	struct mm_struct *mm = &init_mm;
 
-	tsk = fork_idle(cpu);
+	atomic_inc(&mm->mm_count);
+	atomic_inc(&mm->mm_users);
+	current->active_mm = mm;
+	BUG_ON(current->mm);
+	enter_lazy_tlb(mm, current);
 
-	if (IS_ERR(tsk))
-		panic("Failed forking idle task for cpu %d\n", cpu);
-	
-	task_thread_info(tsk)->cpu = cpu;
+	per_cpu_trap_init();
+
+	preempt_disable();
+
+	local_irq_enable();
+
+	calibrate_delay();
+
+	cpu = smp_processor_id();
+	smp_store_cpu_info(cpu);
 
 	cpu_set(cpu, cpu_online_map);
 
-	return 0;
+	cpu_idle();
 }
 
-int start_secondary(void *unused)
+extern struct {
+	unsigned long sp;
+	unsigned long bss_start;
+	unsigned long bss_end;
+	void *start_kernel_fn;
+	void *cpu_init_fn;
+	void *thread_info;
+} stack_start;
+
+int __cpuinit __cpu_up(unsigned int cpu)
 {
-	unsigned int cpu;
+	struct task_struct *tsk;
+	unsigned long timeout;
 
-	cpu = smp_processor_id();
+	tsk = fork_idle(cpu);
+	if (IS_ERR(tsk)) {
+		printk(KERN_ERR "Failed forking idle task for cpu %d\n", cpu);
+		return PTR_ERR(tsk);
+	}
 
-	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
+	/* Fill in data in head.S for secondary cpus */
+	stack_start.sp = tsk->thread.sp;
+	stack_start.thread_info = tsk->stack;
+	stack_start.bss_start = 0; /* don't clear bss for secondary cpus */
+	stack_start.start_kernel_fn = start_secondary;
 
-	smp_store_cpu_info(cpu);
+	flush_cache_all();
 
-	__smp_slave_init(cpu);
-	preempt_disable();
-	per_cpu_trap_init();
-	
-	atomic_inc(&cpus_booted);
+	plat_start_cpu(cpu, (unsigned long)_stext);
 
-	cpu_idle();
-	return 0;
+	timeout = jiffies + HZ;
+	while (time_before(jiffies, timeout)) {
+		if (cpu_online(cpu))
+			break;
+
+		udelay(10);
+	}
+
+	if (cpu_online(cpu))
+		return 0;
+
+	return -ENOENT;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-	smp_mb();
+	unsigned long bogosum = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		bogosum += cpu_data[cpu].loops_per_jiffy;
+
+	printk(KERN_INFO "SMP: Total of %d processors activated "
+	       "(%lu.%02lu BogoMIPS).\n", num_online_cpus(),
+	       bogosum / (500000/HZ),
+	       (bogosum / (5000/HZ)) % 100);
 }
 
 void smp_send_reschedule(int cpu)
 {
-	__smp_send_ipi(cpu, SMP_MSG_RESCHEDULE);
+	plat_send_ipi(cpu, SMP_MSG_RESCHEDULE);
 }
 
 static void stop_this_cpu(void *unused)
@@ -157,7 +178,6 @@
 	smp_call_function(stop_this_cpu, 0, 1, 0);
 }
 
-
 struct smp_fn_call_struct smp_fn_call = {
 	.lock		= SPIN_LOCK_UNLOCKED,
 	.finished	= ATOMIC_INIT(0),
@@ -175,9 +195,6 @@
 	unsigned int nr_cpus = atomic_read(&cpus_booted);
 	int i;
 
-	if (nr_cpus < 2)
-		return 0;
-
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
 
@@ -189,7 +206,7 @@
 
 	for (i = 0; i < nr_cpus; i++)
 		if (i != smp_processor_id())
-			__smp_call_function(i);
+			plat_send_ipi(i, SMP_MSG_FUNCTION);
 
 	if (wait)
 		while (atomic_read(&smp_fn_call.finished) != (nr_cpus - 1));
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 6701504..dcb46e7 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -807,12 +807,13 @@
 }
 #endif
 
-void __init per_cpu_trap_init(void)
+void __cpuinit per_cpu_trap_init(void)
 {
 	extern void *vbr_base;
 
 #ifdef CONFIG_SH_STANDARD_BIOS
-	gdb_vbr_init();
+	if (raw_smp_processor_id() == 0)
+		gdb_vbr_init();
 #endif
 
 	/* NOTE: The VBR value should be at P1
diff --git a/include/asm-sh/smp.h b/include/asm-sh/smp.h
index b99ca78..9c8d34b 100644
--- a/include/asm-sh/smp.h
+++ b/include/asm-sh/smp.h
@@ -1,12 +1,3 @@
-/*
- * include/asm-sh/smp.h
- *
- * Copyright (C) 2002, 2003  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive for
- * more details.
- */
 #ifndef __ASM_SH_SMP_H
 #define __ASM_SH_SMP_H
 
@@ -20,6 +11,15 @@
 #include <asm/current.h>
 
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
+#define hard_smp_processor_id()	plat_smp_processor_id()
+
+/* Map from cpu id to sequential logical cpu number. */
+extern int __cpu_number_map[NR_CPUS];
+#define cpu_number_map(cpu)  __cpu_number_map[cpu]
+
+/* The reverse map from sequential logical cpu number to cpu id.  */
+extern int __cpu_logical_map[NR_CPUS];
+#define cpu_logical_map(cpu)  __cpu_logical_map[cpu]
 
 /* I've no idea what the real meaning of this is */
 #define PROC_CHANGE_PENALTY	20
@@ -35,10 +35,22 @@
 
 extern struct smp_fn_call_struct smp_fn_call;
 
-#define SMP_MSG_RESCHEDULE	0x0001
+#define SMP_MSG_FUNCTION	0
+#define SMP_MSG_RESCHEDULE	1
+#define SMP_MSG_NR		2
 
-#endif /* CONFIG_SMP */
+void plat_smp_setup(void);
+void plat_prepare_cpus(unsigned int max_cpus);
+int plat_smp_processor_id(void);
+void plat_start_cpu(unsigned int cpu, unsigned long entry_point);
+void plat_send_ipi(unsigned int cpu, unsigned int message);
+int plat_register_ipi_handler(unsigned int message,
+			      void (*handler)(void *), void *arg);
+
+#else
 
 #define hard_smp_processor_id()	(0)
 
+#endif /* CONFIG_SMP */
+
 #endif /* __ASM_SH_SMP_H */
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index 2450425..9d849e6 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -266,6 +266,7 @@
 void enable_hlt(void);
 
 void default_idle(void);
+void per_cpu_trap_init(void);
 
 asmlinkage void break_point_trap(void);
 asmlinkage void debug_trap_handler(unsigned long r4, unsigned long r5,