[SPARC64]: Sun4v cross-call sending support.

Technically the hypervisor call supports sending in a list
of all cpus to get the cross-call, but I only pass in one
cpu at a time for now.

The multi-cpu support is there, just ifdef'd out so it's easy to
enable or delete it later.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 223cc6b..c10a3a8 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -531,10 +531,133 @@
 	}
 }
 
+#if 0
+/* Multi-cpu list version.  */
+static int init_cpu_list(u16 *list, cpumask_t mask)
+{
+	int i, cnt;
+
+	cnt = 0;
+	for_each_cpu_mask(i, mask)
+		list[cnt++] = i;
+
+	return cnt;
+}
+
+static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask)
+{
+	int i;
+
+	for (i = 0; i < orig_cnt; i++) {
+		if (list[i] == 0xffff)
+			cpu_clear(i, mask);
+	}
+
+	return init_cpu_list(list, mask);
+}
+
 static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
-	/* XXX implement me */
+	int this_cpu = get_cpu();
+	struct trap_per_cpu *tb = &trap_block[this_cpu];
+	u64 *mondo = __va(tb->cpu_mondo_block_pa);
+	u16 *cpu_list = __va(tb->cpu_list_pa);
+	int cnt, retries;
+
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	retries = 0;
+	cnt = init_cpu_list(cpu_list, mask);
+	do {
+		register unsigned long func __asm__("%o0");
+		register unsigned long arg0 __asm__("%o1");
+		register unsigned long arg1 __asm__("%o2");
+		register unsigned long arg2 __asm__("%o3");
+
+		func = HV_FAST_CPU_MONDO_SEND;
+		arg0 = cnt;
+		arg1 = tb->cpu_list_pa;
+		arg2 = tb->cpu_mondo_block_pa;
+
+		__asm__ __volatile__("ta	%8"
+				     : "=&r" (func), "=&r" (arg0),
+				       "=&r" (arg1), "=&r" (arg2)
+				     : "0" (func), "1" (arg0),
+				       "2" (arg1), "3" (arg2),
+				       "i" (HV_FAST_TRAP)
+				     : "memory");
+		if (likely(func == HV_EOK))
+			break;
+
+		if (unlikely(++retries > 100)) {
+			printk("CPU[%d]: sun4v mondo error %lu\n",
+			       this_cpu, func);
+			break;
+		}
+
+		cnt = update_cpu_list(cpu_list, cnt, mask);
+
+		udelay(2 * cnt);
+	} while (1);
+
+	put_cpu();
 }
+#else
+/* Single-cpu list version.  */
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+{
+	int this_cpu = get_cpu();
+	struct trap_per_cpu *tb = &trap_block[this_cpu];
+	u64 *mondo = __va(tb->cpu_mondo_block_pa);
+	u16 *cpu_list = __va(tb->cpu_list_pa);
+	int i;
+
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	for_each_cpu_mask(i, mask) {
+		int retries = 0;
+
+		do {
+			register unsigned long func __asm__("%o0");
+			register unsigned long arg0 __asm__("%o1");
+			register unsigned long arg1 __asm__("%o2");
+			register unsigned long arg2 __asm__("%o3");
+
+			cpu_list[0] = i;
+			func = HV_FAST_CPU_MONDO_SEND;
+			arg0 = 1;
+			arg1 = tb->cpu_list_pa;
+			arg2 = tb->cpu_mondo_block_pa;
+
+			__asm__ __volatile__("ta	%8"
+					     : "=&r" (func), "=&r" (arg0),
+					       "=&r" (arg1), "=&r" (arg2)
+					     : "0" (func), "1" (arg0),
+					       "2" (arg1), "3" (arg2),
+					       "i" (HV_FAST_TRAP)
+					     : "memory");
+			if (likely(func == HV_EOK))
+				break;
+
+			if (unlikely(++retries > 100)) {
+				printk("CPU[%d]: sun4v mondo error %lu\n",
+				       this_cpu, func);
+				break;
+			}
+
+			udelay(2 * i);
+		} while (1);
+	}
+
+	put_cpu();
+}
+#endif
 
 /* Send cross call to all processors mentioned in MASK
  * except self.