[PPC] Lower load acquire/seq_cst trailing fence to cmp + bne + isync.

Summary:
This fixes pr32392.

The lowering pipeline is:
llvm.ppc.cfence in IR -> PPC::CFENCE8 in isel -> Actual instructions in
expandPostRAPseudo.

The reason why expandPostRAPseudo is chosen is because previous passes
are likely eliminating instructions like cmpw 3, 3 (early CSE) and bne-
7, .+4 (some branch pass(s)).

Differential Revision: https://reviews.llvm.org/D32763

llvm-svn: 303205
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 790a890..3afcec1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1873,6 +1873,8 @@
 }
 
 bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  auto &MBB = *MI.getParent();
+  auto DL = MI.getDebugLoc();
   switch (MI.getOpcode()) {
   case TargetOpcode::LOAD_STACK_GUARD: {
     assert(Subtarget.isTargetLinux() &&
@@ -1920,6 +1922,17 @@
     MI.setDesc(get(Opcode));
     return true;
   }
+  case PPC::CFENCE8: {
+    auto Val = MI.getOperand(0).getReg();
+    BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val);
+    BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
+        .addImm(PPC::PRED_NE_MINUS)
+        .addReg(PPC::CR7)
+        .addImm(1);
+    MI.setDesc(get(PPC::ISYNC));
+    MI.RemoveOperand(0);
+    return true;
+  }
   }
   return false;
 }