x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths
Impact: optimize APIC IPI related barriers
Uncached MMIO accesses for xapic are inherently serializing and hence
we don't need explicit barriers for xapic IPI paths.
x2apic MSR writes/reads don't have serializing semantics and hence need
a serializing instruction or mfence, to make all the previous memory
stores globally visisble before the x2apic msr write for IPI.
Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "steiner@sgi.com" <steiner@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 6d5b6f0..00f5962 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -108,6 +108,16 @@
extern u64 native_apic_icr_read(void);
#ifdef CONFIG_X86_X2APIC
+/*
+ * Make previous memory operations globally visible before
+ * sending the IPI through x2apic wrmsr. We need a serializing instruction or
+ * mfence for this.
+ */
+static inline void x2apic_wrmsr_fence(void)
+{
+ asm volatile("mfence" : : : "memory");
+}
+
static inline void native_apic_msr_write(u32 reg, u32 v)
{
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8fb87b6..4a903e2 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -57,6 +57,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(
@@ -73,6 +75,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu == this_cpu)
@@ -90,6 +94,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 23625b9..a284359 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -58,6 +58,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -73,6 +75,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu != this_cpu)
@@ -89,6 +93,8 @@
unsigned long query_cpu;
unsigned long flags;
+ x2apic_wrmsr_fence();
+
local_irq_save(flags);
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a654d59..821e970 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -187,11 +187,6 @@
cpumask, cpumask_of(smp_processor_id()));
/*
- * Make the above memory operations globally visible before
- * sending the IPI.
- */
- smp_mb();
- /*
* We have to send the IPI only to
* CPUs affected.
*/