New approach to r136737: insert the necessary fences for atomic ops in platform-independent code, since a bunch of platforms (ARM, Mips, PPC, Alpha are the relevant targets here) need to do essentially the same thing.

I think this completes the basic CodeGen for atomicrmw and cmpxchg.

llvm-svn: 136813
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 449f87e..bedc310 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3237,22 +3237,59 @@
   DAG.setRoot(StoreNode);
 }
 
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+                                    bool Before, DebugLoc dl,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  // Fence, if necessary
+  if (Before) {
+    if (Order == AcquireRelease)
+      Order = Release;
+    else if (Order == Acquire || Order == Monotonic)
+      return Chain;
+  } else {
+    if (Order == AcquireRelease)
+      Order = Acquire;
+    else if (Order == Release || Order == Monotonic)
+      return Chain;
+  }
+  SDValue Ops[3];
+  Ops[0] = Chain;
+  Ops[1] = DAG.getConstant(SequentiallyConsistent, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(Order, TLI.getPointerTy());
+  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
-  SDValue Root = getRoot();
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI);
+
   SDValue L =
-    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
                   getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
-                  Root,
+                  InChain,
                   getValue(I.getPointerOperand()),
                   getValue(I.getCompareOperand()),
                   getValue(I.getNewValOperand()),
                   MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
                   I.getOrdering(), I.getSynchScope());
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI);
+
   setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
+  DAG.setRoot(OutChain);
 }
 
 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+  DebugLoc dl = getCurDebugLoc();
   ISD::NodeType NT;
   switch (I.getOperation()) {
   default: llvm_unreachable("Unknown atomicrmw operation"); return;
@@ -3268,16 +3305,30 @@
   case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
   case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
   }
+  AtomicOrdering Order = I.getOrdering();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI);
+
   SDValue L =
-    DAG.getAtomic(NT, getCurDebugLoc(),
+    DAG.getAtomic(NT, dl,
                   getValue(I.getValOperand()).getValueType().getSimpleVT(),
-                  getRoot(),
+                  InChain,
                   getValue(I.getPointerOperand()),
                   getValue(I.getValOperand()),
                   I.getPointerOperand(), 0 /* Alignment */,
-                  I.getOrdering(), I.getSynchScope());
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  I.getSynchScope());
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI);
+
   setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
+  DAG.setRoot(OutChain);
 }
 
 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 335eca7..49b0f1b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -617,6 +617,7 @@
   PrefLoopAlignment = 0;
   MinStackArgumentAlignment = 1;
   ShouldFoldAtomicFences = false;
+  InsertFencesForAtomic = false;
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ed5e3ab..60de3e9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -602,18 +602,8 @@
     // normally.
     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
+    // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+    setInsertFencesForAtomic(true);
   } else {
     // Set them all for expansion, which will force libcalls.
     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
@@ -2258,72 +2248,25 @@
                      DAG.getConstant(DMBOpt, MVT::i32));
 }
 
-static SDValue getFence(SDValue InChain, DebugLoc dl, SelectionDAG &DAG,
-                        const ARMSubtarget *Subtarget) {
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+                                 const ARMSubtarget *Subtarget) {
+  // FIXME: handle "fence singlethread" more efficiently.
+  DebugLoc dl = Op.getDebugLoc();
   if (!Subtarget->hasDataBarrier()) {
     // Some ARMv6 cpus can support data barriers with an mcr instruction.
     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
     // here.
     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
-    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, InChain,
+    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
                        DAG.getConstant(0, MVT::i32));
   }
 
-  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, InChain,
+  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
                      DAG.getConstant(ARM_MB::ISH, MVT::i32));
 }
 
-static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
-                                 const ARMSubtarget *Subtarget) {
-  // FIXME: handle "fence singlethread" more efficiently.
-  DebugLoc dl = Op.getDebugLoc();
-  return getFence(Op.getOperand(0), dl, DAG, Subtarget);
-}
-
-static SDValue LowerAtomicMemOp(SDValue Op, SelectionDAG &DAG,
-                                const ARMSubtarget *Subtarget) {
-  DebugLoc dl = Op.getDebugLoc();
-  int Order = cast<AtomicSDNode>(Op)->getOrdering();
-  if (Order <= Monotonic)
-    return Op;
-
-  SDValue InChain = Op.getOperand(0);
-
-  // Fence, if necessary
-  if (Order == Release || Order >= AcquireRelease)
-    InChain = getFence(InChain, dl, DAG, Subtarget);
-
-  // Rather than mess with target-specific nodes, use the target-indepedent
-  // node, and assume the DAGCombiner will not touch it post-legalize. 
-  SDValue OutVal;
-  if (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP)
-    OutVal = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
-                           cast<AtomicSDNode>(Op)->getMemoryVT(),
-                           InChain, Op.getOperand(1), Op.getOperand(2),
-                           Op.getOperand(3),
-                           cast<AtomicSDNode>(Op)->getMemOperand(),
-                           Monotonic,
-                           cast<AtomicSDNode>(Op)->getSynchScope());
-  else
-    OutVal = DAG.getAtomic(Op.getOpcode(), dl,
-                           cast<AtomicSDNode>(Op)->getMemoryVT(),
-                           InChain, Op.getOperand(1), Op.getOperand(2),
-                           cast<AtomicSDNode>(Op)->getMemOperand(),
-                           Monotonic,
-                           cast<AtomicSDNode>(Op)->getSynchScope());
-
-  SDValue OutChain = OutVal.getValue(1);
-
-  // Fence, if necessary 
-  if (Order == Acquire || Order >= AcquireRelease)
-    OutChain = getFence(OutChain, dl, DAG, Subtarget);
-
-  SDValue Ops[2] = { OutVal, OutChain };
-  return DAG.getMergeValues(Ops, 2, dl);
-}
-
-
 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
                              const ARMSubtarget *Subtarget) {
   // ARM pre v5TE and Thumb1 does not have preload instructions.
@@ -4882,18 +4825,6 @@
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
-  case ISD::ATOMIC_CMP_SWAP:
-  case ISD::ATOMIC_SWAP:
-  case ISD::ATOMIC_LOAD_ADD:
-  case ISD::ATOMIC_LOAD_SUB:
-  case ISD::ATOMIC_LOAD_AND:
-  case ISD::ATOMIC_LOAD_OR:
-  case ISD::ATOMIC_LOAD_XOR:
-  case ISD::ATOMIC_LOAD_NAND:
-  case ISD::ATOMIC_LOAD_MIN:
-  case ISD::ATOMIC_LOAD_MAX:
-  case ISD::ATOMIC_LOAD_UMIN:
-  case ISD::ATOMIC_LOAD_UMAX: return LowerAtomicMemOp(Op, DAG, Subtarget);
   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
index de003fb..8fb63400 100644
--- a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -160,6 +160,8 @@
 
   setMinFunctionAlignment(4);
 
+  setInsertFencesForAtomic(true);
+
   computeRegisterProperties();
 }
 
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0dac789..54fa2d4 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -164,6 +164,8 @@
   setOperationAction(ISD::MEMBARRIER,        MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE,      MVT::Other, Custom);  
 
+  setInsertFencesForAtomic(true);
+
   if (Subtarget->isSingleFloat())
     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ff4f199..78a75f9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -401,6 +401,8 @@
   if (PPCSubTarget.isDarwin())
     setPrefFunctionAlignment(4);
 
+  setInsertFencesForAtomic(true);
+
   computeRegisterProperties();
 }