[Hexagon] Eliminate pseudo instructions for circ/brev loads and stores

We can generate the actual instructions from the intrinsics without the
need for pseudo-instructions. Also, since the intrinsics have a side-
effect in a form of a store, attempt to optimize away loads from the
store location.

llvm-svn: 260690
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index a0da945..b0e04cf 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -48,7 +48,7 @@
 ///
 namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
-  const HexagonTargetMachine& HTM;
+  const HexagonTargetMachine &HTM;
   const HexagonSubtarget *HST;
   const HexagonInstrInfo *HII;
   const HexagonRegisterInfo *HRI;
@@ -84,12 +84,21 @@
     return "Hexagon DAG->DAG Pattern Instruction Selection";
   }
 
+  // Generate a machine instruction node corresponding to the circ/brev
+  // load intrinsic.
+  MachineSDNode *LoadInstrForLoadIntrinsic(SDNode *IntN);
+  // Given the circ/brev load intrinsic and the already generated machine
+  // instruction, generate the appropriate store (that is a part of the
+  // intrinsic's functionality).
+  SDNode *StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN);
+
   SDNode *SelectFrameIndex(SDNode *N);
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
+  SDNode *SelectLoadOfLoadIntrinsic(LoadSDNode *N);
   SDNode *SelectLoad(SDNode *N);
   SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
   SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -485,20 +494,173 @@
 }
 
 
+MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
+  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return nullptr;
+
+  SDLoc dl(IntN);
+  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+
+  static std::map<unsigned,unsigned> LoadPciMap = {
+    { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
+    { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
+    { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
+    { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
+    { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
+    { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
+  };
+  auto FLC = LoadPciMap.find(IntNo);
+  if (FLC != LoadPciMap.end()) {
+    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
+          IntN->getOperand(4));
+    EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
+    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+    // Operands: { Base, Increment, Modifier, Chain }
+    auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
+    SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
+    MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
+          { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) });
+    return Res;
+  }
+
+  static std::map<unsigned,unsigned> LoadPbrMap = {
+    { Intrinsic::hexagon_brev_ldb,  Hexagon::L2_loadrb_pbr  },
+    { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr },
+    { Intrinsic::hexagon_brev_ldh,  Hexagon::L2_loadrh_pbr  },
+    { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr },
+    { Intrinsic::hexagon_brev_ldw,  Hexagon::L2_loadri_pbr  },
+    { Intrinsic::hexagon_brev_ldd,  Hexagon::L2_loadrd_pbr  },
+  };
+  auto FLB = LoadPbrMap.find(IntNo);
+  if (FLB != LoadPbrMap.end()) {
+    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
+            IntN->getOperand(4));
+    EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i64 : MVT::i32;
+    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+    // Operands: { Base, Modifier, Chain }
+    MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys,
+          { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) });
+    return Res;
+  }
+
+  return nullptr;
+}
+
+SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
+      SDNode *IntN) {
+  // The "LoadN" is just a machine load instruction. The intrinsic also
+  // involves storing it. Generate an appropriate store to the location
+  // given in the intrinsic's operand(3).
+  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
+  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
+                      HexagonII::MemAccesSizeMask;
+  unsigned Size = 1U << (SizeBits-1);
+
+  SDLoc dl(IntN);
+  MachinePointerInfo PI;
+  SDValue TS;
+  SDValue Loc = IntN->getOperand(3);
+
+  if (Size >= 4)
+    TS = CurDAG->getStore(SDValue(LoadN,2), dl, SDValue(LoadN, 0), Loc, PI,
+                          false, false, Size);
+  else
+    TS = CurDAG->getTruncStore(SDValue(LoadN,2), dl, SDValue(LoadN,0), Loc, PI,
+                               MVT::getIntegerVT(Size*8), false, false, Size);
+  SDNode *StoreN = SelectStore(TS.getNode());
+
+  // Load's results are { Loaded value, Updated pointer, Chain }
+  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
+  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
+  return StoreN;
+}
+
+SDNode *HexagonDAGToDAGISel::SelectLoadOfLoadIntrinsic(LoadSDNode *N) {
+  // The intrinsics for load circ/brev perform two operations:
+  // 1. Load a value V from the specified location, using the addressing
+  //    mode corresponding to the intrinsic.
+  // 2. Store V into a specified location. This location is typically a
+  //    local, temporary object.
+  // In many cases, the program using these intrinsics will immediately
+  // load V again from the local object. In those cases, when certain
+  // conditions are met, the last load can be removed.
+  // This function identifies and optimizes this pattern. If the pattern
+  // cannot be optimized, it returns nullptr, which will cause the load
+  // to be selected separately from the intrinsic (which will be handled
+  // in SelectIntrinsicWChain).
+
+  SDValue Ch = N->getOperand(0);
+  SDValue Loc = N->getOperand(1);
+
+  // Assume that the load and the intrinsic are connected directly with a
+  // chain:
+  //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
+  //   t2: i32,ch = load t1:1, Loc, ...
+  SDNode *C = Ch.getNode();
+
+  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return nullptr;
+
+  // The second load can only be eliminated if its extension type matches
+  // that of the load instruction corresponding to the intrinsic. The user
+  // can provide an address of an unsigned variable to store the result of
+  // a sign-extending intrinsic into (or the other way around).
+  ISD::LoadExtType IntExt;
+  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
+    case Intrinsic::hexagon_brev_ldub:
+    case Intrinsic::hexagon_brev_lduh:
+    case Intrinsic::hexagon_circ_ldub:
+    case Intrinsic::hexagon_circ_lduh:
+      IntExt = ISD::ZEXTLOAD;
+      break;
+    case Intrinsic::hexagon_brev_ldw:
+    case Intrinsic::hexagon_brev_ldd:
+    case Intrinsic::hexagon_circ_ldw:
+    case Intrinsic::hexagon_circ_ldd:
+      IntExt = ISD::NON_EXTLOAD;
+      break;
+    default:
+      IntExt = ISD::SEXTLOAD;
+      break;
+  }
+  if (N->getExtensionType() != IntExt)
+    return nullptr;
+
+  // Make sure the target location for the loaded value in the load intrinsic
+  // is the location from which LD (or N) is loading.
+  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
+    return nullptr;
+
+  if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) {
+    SDNode *S = StoreInstrForLoadIntrinsic(L, C);
+    SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
+    SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
+    ReplaceUses(F, T, array_lengthof(T));
+    // This transformation will leave the intrinsic dead. If it remains in
+    // the DAG, the selection code will see it again, but without the load,
+    // and it will generate a store that is normally required for it.
+    CurDAG->RemoveDeadNodes();
+    return L;
+  }
+
+  return nullptr;
+}
+
+
 SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
-  SDNode *result;
   SDLoc dl(N);
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::MemIndexedMode AM = LD->getAddressingMode();
 
   // Handle indexed loads.
-  if (AM != ISD::UNINDEXED) {
-    result = SelectIndexedLoad(LD, dl);
-  } else {
-    result = SelectCode(LD);
-  }
+  if (AM != ISD::UNINDEXED)
+    return SelectIndexedLoad(LD, dl);
 
-  return result;
+  // Handle patterns using circ/brev load intrinsics.
+  if (SDNode *LI = SelectLoadOfLoadIntrinsic(LD))
+    return LI;
+
+  return SelectCode(LD);
 }
 
 
@@ -833,207 +995,16 @@
   return SelectCode(N);
 }
 
+
 //
-// Checking for intrinsics circular load/store, and bitreverse load/store
-// instrisics in order to select the correct lowered operation.
+// Handling intrinsics for circular load and bitreverse load.
 //
 SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
-  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-  if (IntNo == Intrinsic::hexagon_circ_ldd  ||
-      IntNo == Intrinsic::hexagon_circ_ldw  ||
-      IntNo == Intrinsic::hexagon_circ_lduh ||
-      IntNo == Intrinsic::hexagon_circ_ldh  ||
-      IntNo == Intrinsic::hexagon_circ_ldub ||
-      IntNo == Intrinsic::hexagon_circ_ldb) {
-    SDLoc dl(N);
-    SDValue Chain = N->getOperand(0);
-    SDValue Base = N->getOperand(2);
-    SDValue Load = N->getOperand(3);
-    SDValue ModifierExpr = N->getOperand(4);
-    SDValue Offset = N->getOperand(5);
-
-    // We need to add the rerurn type for the load.  This intrinsic has
-    // two return types, one for the load and one for the post-increment.
-    // Only the *_ld instructions push the extra return type, and bump the
-    // result node operand number correspondingly.
-    std::vector<EVT> ResTys;
-    unsigned opc;
-    unsigned memsize, align;
-    MVT MvtSize = MVT::i32;
-
-    if (IntNo == Intrinsic::hexagon_circ_ldd) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i64);
-      opc = Hexagon::L2_loadrd_pci_pseudo;
-      memsize = 8;
-      align = 8;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadri_pci_pseudo;
-      memsize = 4;
-      align = 4;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrh_pci_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadruh_pci_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrb_pci_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrub_pci_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else
-      llvm_unreachable("no opc");
-
-    ResTys.push_back(MVT::Other);
-
-    // Copy over the arguments, which are the same mostly.
-    SmallVector<SDValue, 5> Ops;
-    Ops.push_back(Base);
-    Ops.push_back(Load);
-    Ops.push_back(ModifierExpr);
-    int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
-    Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
-    Ops.push_back(Chain);
-    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
-
-    SDValue ST;
-    MachineMemOperand *Mem =
-      MF->getMachineMemOperand(MachinePointerInfo(),
-                               MachineMemOperand::MOStore, memsize, align);
-    if (MvtSize != MVT::i32)
-      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
-                                 MvtSize, Mem);
-    else
-      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
-
-    SDNode* Store = SelectStore(ST.getNode());
-
-    const SDValue Froms[] = { SDValue(N, 0),
-                              SDValue(N, 1) };
-    const SDValue Tos[]   = { SDValue(Result, 0),
-                              SDValue(Store, 0) };
-    ReplaceUses(Froms, Tos, 2);
-    return Result;
-  }
-
-  if (IntNo == Intrinsic::hexagon_brev_ldd  ||
-      IntNo == Intrinsic::hexagon_brev_ldw  ||
-      IntNo == Intrinsic::hexagon_brev_ldh  ||
-      IntNo == Intrinsic::hexagon_brev_lduh ||
-      IntNo == Intrinsic::hexagon_brev_ldb  ||
-      IntNo == Intrinsic::hexagon_brev_ldub) {
-    SDLoc dl(N);
-    SDValue Chain = N->getOperand(0);
-    SDValue Base = N->getOperand(2);
-    SDValue Load = N->getOperand(3);
-    SDValue ModifierExpr = N->getOperand(4);
-
-    // We need to add the rerurn type for the load.  This intrinsic has
-    // two return types, one for the load and one for the post-increment.
-    std::vector<EVT> ResTys;
-    unsigned opc;
-    unsigned memsize, align;
-    MVT MvtSize = MVT::i32;
-
-    if (IntNo == Intrinsic::hexagon_brev_ldd) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i64);
-      opc = Hexagon::L2_loadrd_pbr_pseudo;
-      memsize = 8;
-      align = 8;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadri_pbr_pseudo;
-      memsize = 4;
-      align = 4;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrh_pbr_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadruh_pbr_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrb_pbr_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrub_pbr_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else
-      llvm_unreachable("no opc");
-
-    ResTys.push_back(MVT::Other);
-
-    // Copy over the arguments, which are the same mostly.
-    SmallVector<SDValue, 4> Ops;
-    Ops.push_back(Base);
-    Ops.push_back(Load);
-    Ops.push_back(ModifierExpr);
-    Ops.push_back(Chain);
-    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
-    SDValue ST;
-    MachineMemOperand *Mem =
-      MF->getMachineMemOperand(MachinePointerInfo(),
-                               MachineMemOperand::MOStore, memsize, align);
-    if (MvtSize != MVT::i32)
-      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
-                                 MvtSize, Mem);
-    else
-      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
-
-    SDNode* Store = SelectStore(ST.getNode());
-
-    const SDValue Froms[] = { SDValue(N, 0),
-                              SDValue(N, 1) };
-    const SDValue Tos[]   = { SDValue(Result, 0),
-                              SDValue(Store, 0) };
-    ReplaceUses(Froms, Tos, 2);
-    return Result;
-  }
-
+  if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N))
+    return StoreInstrForLoadIntrinsic(L, N);
   return SelectCode(N);
 }
 
-//
-// Checking for intrinsics which have predicate registers as operand(s)
-// and lowering to the actual intrinsic.
-//
 SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   unsigned Bits;
@@ -1392,7 +1363,7 @@
     auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
       if (Op.getOpcode() != ISD::SELECT)
         return false;
-      return IsZero(Op.getOperand(1))  || IsZero(Op.getOperand(2));
+      return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
     };
 
     SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);