Relaxed memory barriers for x86

X86 provides stronger memory guarantees and thus the memory barriers can be
optimized. This patch ensures that all memory barriers for x86 are treated
as scheduling barriers. And in cases where a barrier is needed (StoreLoad case),
an mfence is used.

Change-Id: I13d02bf3f152083ba9f358052aedb583b0d48640
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 9994927..9588b17 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -425,10 +425,54 @@
   FreeTemp(rX86_ARG3);
 }
 
+bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
+    switch (opcode) {
+      case kX86LockCmpxchgMR:
+      case kX86LockCmpxchgAR:
+      case kX86LockCmpxchg8bM:
+      case kX86LockCmpxchg8bA:
+      case kX86XchgMR:
+      case kX86Mfence:
+        // Atomic memory instructions provide full barrier.
+        return true;
+      default:
+        break;
+    }
+
+    // Conservative if cannot prove it provides full barrier.
+    return false;
+}
+
 void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
-  // TODO: optimize fences
-  NewLIR0(kX86Mfence);
+  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
+  LIR* mem_barrier = last_lir_insn_;
+
+  /*
+   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
+   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
+   * to ensure is that there is a scheduling barrier in place.
+   */
+  if (barrier_kind == kStoreLoad) {
+    // If no LIR exists already that can be used a barrier, then generate an mfence.
+    if (mem_barrier == nullptr) {
+      mem_barrier = NewLIR0(kX86Mfence);
+    }
+
+    // If last instruction does not provide full barrier, then insert an mfence.
+    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
+      mem_barrier = NewLIR0(kX86Mfence);
+    }
+  }
+
+  // Now ensure that a scheduling barrier is in place.
+  if (mem_barrier == nullptr) {
+    GenBarrier();
+  } else {
+    // Mark as a scheduling barrier.
+    DCHECK(!mem_barrier->flags.use_def_invalid);
+    mem_barrier->u.m.def_mask = ENCODE_ALL;
+  }
 #endif
 }