x86/numachip: Add Numachip IPI optimisations

When sending IPIs, first check if the non-local part of the source and
destination APIC IDs match; if so, send via the local APIC for efficiency.

Secondly, since the AMD BIOS-kernel developer guide states IPI delivery
will occur invarient of prior deliver status, avoid polling the delivery
status bit for efficiency.

Signed-off-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Steffen Persvold <sp@numascale.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/1442768522-19217-3-git-send-email-daniel@numascale.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 3cb9294..38dd5ef 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -96,9 +96,25 @@
 
 static void numachip_send_IPI_one(int cpu, int vector)
 {
-	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	int local_apicid, apicid = per_cpu(x86_cpu_to_apicid, cpu);
 	unsigned int dmode;
 
+	preempt_disable();
+	local_apicid = __this_cpu_read(x86_cpu_to_apicid);
+
+	/* Send via local APIC where non-local part matches */
+	if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__default_send_IPI_dest_field(apicid, vector,
+			APIC_DEST_PHYSICAL);
+		local_irq_restore(flags);
+		preempt_enable();
+		return;
+	}
+	preempt_enable();
+
 	dmode = (vector == NMI_VECTOR) ? APIC_DM_NMI : APIC_DM_FIXED;
 	numachip_apic_icr_write(apicid, dmode | vector);
 }
@@ -218,6 +234,17 @@
 	return 1;
 }
 
+/* APIC IPIs are queued */
+static void numachip_apic_wait_icr_idle(void)
+{
+}
+
+/* APIC NMI IPIs are queued */
+static u32 numachip_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
+
 static const struct apic apic_numachip1 __refconst = {
 	.name				= "NumaConnect system",
 	.probe				= numachip1_probe,
@@ -263,8 +290,8 @@
 	.eoi_write			= native_apic_mem_write,
 	.icr_read			= native_apic_icr_read,
 	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= native_apic_wait_icr_idle,
-	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
 };
 
 apic_driver(apic_numachip1);
@@ -314,8 +341,8 @@
 	.eoi_write			= native_apic_mem_write,
 	.icr_read			= native_apic_icr_read,
 	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= native_apic_wait_icr_idle,
-	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
 };
 
 apic_driver(apic_numachip2);