[Hexagon] Eliminate pseudo instructions for circ/brev loads and stores

We can generate the actual instructions from the intrinsics without the
need for pseudo-instructions. Also, since the intrinsics have a side-
effect in a form of a store, attempt to optimize away loads from the
store location.

llvm-svn: 260690
diff --git a/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 6e2dbc0..3e24cd6 100644
--- a/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -80,166 +80,7 @@
          ++MII) {
       MachineInstr *MI = MII;
       int Opc = MI->getOpcode();
-      if (Opc == Hexagon::S2_storerb_pci_pseudo ||
-          Opc == Hexagon::S2_storerh_pci_pseudo ||
-          Opc == Hexagon::S2_storeri_pci_pseudo ||
-          Opc == Hexagon::S2_storerd_pci_pseudo ||
-          Opc == Hexagon::S2_storerf_pci_pseudo) {
-        unsigned Opcode;
-        if (Opc == Hexagon::S2_storerd_pci_pseudo)
-          Opcode = Hexagon::S2_storerd_pci;
-        else if (Opc == Hexagon::S2_storeri_pci_pseudo)
-          Opcode = Hexagon::S2_storeri_pci;
-        else if (Opc == Hexagon::S2_storerh_pci_pseudo)
-          Opcode = Hexagon::S2_storerh_pci;
-        else if (Opc == Hexagon::S2_storerf_pci_pseudo)
-          Opcode = Hexagon::S2_storerf_pci;
-        else if (Opc == Hexagon::S2_storerb_pci_pseudo)
-          Opcode = Hexagon::S2_storerb_pci;
-        else
-          llvm_unreachable("wrong Opc");
-        MachineOperand &Op0 = MI->getOperand(0);
-        MachineOperand &Op1 = MI->getOperand(1);
-        MachineOperand &Op2 = MI->getOperand(2);
-        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
-        MachineOperand &Op4 = MI->getOperand(4);
-        // Emit a "C6 = Rn, C6 is the control register for M0".
-        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
-                Hexagon::C6)->addOperand(Op3);
-        // Replace the pseude circ_ldd by the real circ_ldd.
-        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
-                                      TII->get(Opcode));
-        NewMI->addOperand(Op0);
-        NewMI->addOperand(Op1);
-        NewMI->addOperand(Op4);
-        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
-                                                    false, /*isDef*/
-                                                    false, /*isImpl*/
-                                                    true   /*isKill*/));
-        NewMI->addOperand(Op2);
-        MII = MBB->erase(MI);
-        --MII;
-      } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
-                 Opc == Hexagon::L2_loadri_pci_pseudo ||
-                 Opc == Hexagon::L2_loadrh_pci_pseudo ||
-                 Opc == Hexagon::L2_loadruh_pci_pseudo||
-                 Opc == Hexagon::L2_loadrb_pci_pseudo ||
-                 Opc == Hexagon::L2_loadrub_pci_pseudo) {
-        unsigned Opcode;
-        if (Opc == Hexagon::L2_loadrd_pci_pseudo)
-          Opcode = Hexagon::L2_loadrd_pci;
-        else if (Opc == Hexagon::L2_loadri_pci_pseudo)
-          Opcode = Hexagon::L2_loadri_pci;
-        else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
-          Opcode = Hexagon::L2_loadrh_pci;
-        else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
-          Opcode = Hexagon::L2_loadruh_pci;
-        else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
-          Opcode = Hexagon::L2_loadrb_pci;
-        else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
-          Opcode = Hexagon::L2_loadrub_pci;
-        else
-          llvm_unreachable("wrong Opc");
-
-        MachineOperand &Op0 = MI->getOperand(0);
-        MachineOperand &Op1 = MI->getOperand(1);
-        MachineOperand &Op2 = MI->getOperand(2);
-        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
-        MachineOperand &Op5 = MI->getOperand(5);
-        // Emit a "C6 = Rn, C6 is the control register for M0".
-        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
-                Hexagon::C6)->addOperand(Op4);
-        // Replace the pseude circ_ldd by the real circ_ldd.
-        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
-                                      TII->get(Opcode));
-        NewMI->addOperand(Op1);
-        NewMI->addOperand(Op0);
-        NewMI->addOperand(Op2);
-        NewMI->addOperand(Op5);
-        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
-                                                    false, /*isDef*/
-                                                    false, /*isImpl*/
-                                                    true   /*isKill*/));
-        MII = MBB->erase(MI);
-        --MII;
-      } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
-                 Opc == Hexagon::L2_loadri_pbr_pseudo ||
-                 Opc == Hexagon::L2_loadrh_pbr_pseudo ||
-                 Opc == Hexagon::L2_loadruh_pbr_pseudo||
-                 Opc == Hexagon::L2_loadrb_pbr_pseudo ||
-                 Opc == Hexagon::L2_loadrub_pbr_pseudo) {
-        unsigned Opcode;
-        if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
-          Opcode = Hexagon::L2_loadrd_pbr;
-        else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
-          Opcode = Hexagon::L2_loadri_pbr;
-        else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
-          Opcode = Hexagon::L2_loadrh_pbr;
-        else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
-          Opcode = Hexagon::L2_loadruh_pbr;
-        else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
-          Opcode = Hexagon::L2_loadrb_pbr;
-        else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
-          Opcode = Hexagon::L2_loadrub_pbr;
-        else
-          llvm_unreachable("wrong Opc");
-        MachineOperand &Op0 = MI->getOperand(0);
-        MachineOperand &Op1 = MI->getOperand(1);
-        MachineOperand &Op2 = MI->getOperand(2);
-        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
-        // Emit a "C6 = Rn, C6 is the control register for M0".
-        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
-                Hexagon::C6)->addOperand(Op4);
-        // Replace the pseudo brev_ldd by the real brev_ldd.
-        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
-                                      TII->get(Opcode));
-        NewMI->addOperand(Op1);
-        NewMI->addOperand(Op0);
-        NewMI->addOperand(Op2);
-        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
-                                                    false, /*isDef*/
-                                                    false, /*isImpl*/
-                                                    true   /*isKill*/));
-        MII = MBB->erase(MI);
-        --MII;
-      } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
-                 Opc == Hexagon::S2_storeri_pbr_pseudo ||
-                 Opc == Hexagon::S2_storerh_pbr_pseudo ||
-                 Opc == Hexagon::S2_storerb_pbr_pseudo ||
-                 Opc == Hexagon::S2_storerf_pbr_pseudo) {
-        unsigned Opcode;
-        if (Opc == Hexagon::S2_storerd_pbr_pseudo)
-          Opcode = Hexagon::S2_storerd_pbr;
-        else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
-          Opcode = Hexagon::S2_storeri_pbr;
-        else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
-          Opcode = Hexagon::S2_storerh_pbr;
-        else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
-          Opcode = Hexagon::S2_storerf_pbr;
-        else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
-          Opcode = Hexagon::S2_storerb_pbr;
-        else
-          llvm_unreachable("wrong Opc");
-        MachineOperand &Op0 = MI->getOperand(0);
-        MachineOperand &Op1 = MI->getOperand(1);
-        MachineOperand &Op2 = MI->getOperand(2);
-        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
-        // Emit a "C6 = Rn, C6 is the control register for M0".
-        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
-                Hexagon::C6)->addOperand(Op3);
-        // Replace the pseudo brev_ldd by the real brev_ldd.
-        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
-                                      TII->get(Opcode));
-        NewMI->addOperand(Op0);
-        NewMI->addOperand(Op1);
-        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
-                                                    false, /*isDef*/
-                                                    false, /*isImpl*/
-                                                    true   /*isKill*/));
-        NewMI->addOperand(Op2);
-        MII = MBB->erase(MI);
-        --MII;
-      } else if (Opc == Hexagon::STriw_pred) {
+      if (Opc == Hexagon::STriw_pred) {
         // STriw_pred [R30], ofst, SrcReg;
         unsigned FP = MI->getOperand(0).getReg();
         assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index a0da945..b0e04cf 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -48,7 +48,7 @@
 ///
 namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
-  const HexagonTargetMachine& HTM;
+  const HexagonTargetMachine &HTM;
   const HexagonSubtarget *HST;
   const HexagonInstrInfo *HII;
   const HexagonRegisterInfo *HRI;
@@ -84,12 +84,21 @@
     return "Hexagon DAG->DAG Pattern Instruction Selection";
   }
 
+  // Generate a machine instruction node corresponding to the circ/brev
+  // load intrinsic.
+  MachineSDNode *LoadInstrForLoadIntrinsic(SDNode *IntN);
+  // Given the circ/brev load intrinsic and the already generated machine
+  // instruction, generate the appropriate store (that is a part of the
+  // intrinsic's functionality).
+  SDNode *StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN);
+
   SDNode *SelectFrameIndex(SDNode *N);
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
+  SDNode *SelectLoadOfLoadIntrinsic(LoadSDNode *N);
   SDNode *SelectLoad(SDNode *N);
   SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
   SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -485,20 +494,173 @@
 }
 
 
+MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
+  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return nullptr;
+
+  SDLoc dl(IntN);
+  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+
+  static std::map<unsigned,unsigned> LoadPciMap = {
+    { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
+    { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
+    { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
+    { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
+    { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
+    { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
+  };
+  auto FLC = LoadPciMap.find(IntNo);
+  if (FLC != LoadPciMap.end()) {
+    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
+          IntN->getOperand(4));
+    EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
+    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+    // Operands: { Base, Increment, Modifier, Chain }
+    auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
+    SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
+    MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
+          { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) });
+    return Res;
+  }
+
+  static std::map<unsigned,unsigned> LoadPbrMap = {
+    { Intrinsic::hexagon_brev_ldb,  Hexagon::L2_loadrb_pbr  },
+    { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr },
+    { Intrinsic::hexagon_brev_ldh,  Hexagon::L2_loadrh_pbr  },
+    { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr },
+    { Intrinsic::hexagon_brev_ldw,  Hexagon::L2_loadri_pbr  },
+    { Intrinsic::hexagon_brev_ldd,  Hexagon::L2_loadrd_pbr  },
+  };
+  auto FLB = LoadPbrMap.find(IntNo);
+  if (FLB != LoadPbrMap.end()) {
+    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
+            IntN->getOperand(4));
+    EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i64 : MVT::i32;
+    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+    // Operands: { Base, Modifier, Chain }
+    MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys,
+          { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) });
+    return Res;
+  }
+
+  return nullptr;
+}
+
+SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
+      SDNode *IntN) {
+  // The "LoadN" is just a machine load instruction. The intrinsic also
+  // involves storing it. Generate an appropriate store to the location
+  // given in the intrinsic's operand(3).
+  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
+  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
+                      HexagonII::MemAccesSizeMask;
+  unsigned Size = 1U << (SizeBits-1);
+
+  SDLoc dl(IntN);
+  MachinePointerInfo PI;
+  SDValue TS;
+  SDValue Loc = IntN->getOperand(3);
+
+  if (Size >= 4)
+    TS = CurDAG->getStore(SDValue(LoadN,2), dl, SDValue(LoadN, 0), Loc, PI,
+                          false, false, Size);
+  else
+    TS = CurDAG->getTruncStore(SDValue(LoadN,2), dl, SDValue(LoadN,0), Loc, PI,
+                               MVT::getIntegerVT(Size*8), false, false, Size);
+  SDNode *StoreN = SelectStore(TS.getNode());
+
+  // Load's results are { Loaded value, Updated pointer, Chain }
+  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
+  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
+  return StoreN;
+}
+
+SDNode *HexagonDAGToDAGISel::SelectLoadOfLoadIntrinsic(LoadSDNode *N) {
+  // The intrinsics for load circ/brev perform two operations:
+  // 1. Load a value V from the specified location, using the addressing
+  //    mode corresponding to the intrinsic.
+  // 2. Store V into a specified location. This location is typically a
+  //    local, temporary object.
+  // In many cases, the program using these intrinsics will immediately
+  // load V again from the local object. In those cases, when certain
+  // conditions are met, the last load can be removed.
+  // This function identifies and optimizes this pattern. If the pattern
+  // cannot be optimized, it returns nullptr, which will cause the load
+  // to be selected separately from the intrinsic (which will be handled
+  // in SelectIntrinsicWChain).
+
+  SDValue Ch = N->getOperand(0);
+  SDValue Loc = N->getOperand(1);
+
+  // Assume that the load and the intrinsic are connected directly with a
+  // chain:
+  //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
+  //   t2: i32,ch = load t1:1, Loc, ...
+  SDNode *C = Ch.getNode();
+
+  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return nullptr;
+
+  // The second load can only be eliminated if its extension type matches
+  // that of the load instruction corresponding to the intrinsic. The user
+  // can provide an address of an unsigned variable to store the result of
+  // a sign-extending intrinsic into (or the other way around).
+  ISD::LoadExtType IntExt;
+  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
+    case Intrinsic::hexagon_brev_ldub:
+    case Intrinsic::hexagon_brev_lduh:
+    case Intrinsic::hexagon_circ_ldub:
+    case Intrinsic::hexagon_circ_lduh:
+      IntExt = ISD::ZEXTLOAD;
+      break;
+    case Intrinsic::hexagon_brev_ldw:
+    case Intrinsic::hexagon_brev_ldd:
+    case Intrinsic::hexagon_circ_ldw:
+    case Intrinsic::hexagon_circ_ldd:
+      IntExt = ISD::NON_EXTLOAD;
+      break;
+    default:
+      IntExt = ISD::SEXTLOAD;
+      break;
+  }
+  if (N->getExtensionType() != IntExt)
+    return nullptr;
+
+  // Make sure the target location for the loaded value in the load intrinsic
+  // is the location from which LD (or N) is loading.
+  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
+    return nullptr;
+
+  if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) {
+    SDNode *S = StoreInstrForLoadIntrinsic(L, C);
+    SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
+    SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
+    ReplaceUses(F, T, array_lengthof(T));
+    // This transformation will leave the intrinsic dead. If it remains in
+    // the DAG, the selection code will see it again, but without the load,
+    // and it will generate a store that is normally required for it.
+    CurDAG->RemoveDeadNodes();
+    return L;
+  }
+
+  return nullptr;
+}
+
+
 SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
-  SDNode *result;
   SDLoc dl(N);
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::MemIndexedMode AM = LD->getAddressingMode();
 
   // Handle indexed loads.
-  if (AM != ISD::UNINDEXED) {
-    result = SelectIndexedLoad(LD, dl);
-  } else {
-    result = SelectCode(LD);
-  }
+  if (AM != ISD::UNINDEXED)
+    return SelectIndexedLoad(LD, dl);
 
-  return result;
+  // Handle patterns using circ/brev load intrinsics.
+  if (SDNode *LI = SelectLoadOfLoadIntrinsic(LD))
+    return LI;
+
+  return SelectCode(LD);
 }
 
 
@@ -833,207 +995,16 @@
   return SelectCode(N);
 }
 
+
 //
-// Checking for intrinsics circular load/store, and bitreverse load/store
-// instrisics in order to select the correct lowered operation.
+// Handling intrinsics for circular load and bitreverse load.
 //
 SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
-  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-  if (IntNo == Intrinsic::hexagon_circ_ldd  ||
-      IntNo == Intrinsic::hexagon_circ_ldw  ||
-      IntNo == Intrinsic::hexagon_circ_lduh ||
-      IntNo == Intrinsic::hexagon_circ_ldh  ||
-      IntNo == Intrinsic::hexagon_circ_ldub ||
-      IntNo == Intrinsic::hexagon_circ_ldb) {
-    SDLoc dl(N);
-    SDValue Chain = N->getOperand(0);
-    SDValue Base = N->getOperand(2);
-    SDValue Load = N->getOperand(3);
-    SDValue ModifierExpr = N->getOperand(4);
-    SDValue Offset = N->getOperand(5);
-
-    // We need to add the rerurn type for the load.  This intrinsic has
-    // two return types, one for the load and one for the post-increment.
-    // Only the *_ld instructions push the extra return type, and bump the
-    // result node operand number correspondingly.
-    std::vector<EVT> ResTys;
-    unsigned opc;
-    unsigned memsize, align;
-    MVT MvtSize = MVT::i32;
-
-    if (IntNo == Intrinsic::hexagon_circ_ldd) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i64);
-      opc = Hexagon::L2_loadrd_pci_pseudo;
-      memsize = 8;
-      align = 8;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadri_pci_pseudo;
-      memsize = 4;
-      align = 4;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrh_pci_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadruh_pci_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrb_pci_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrub_pci_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else
-      llvm_unreachable("no opc");
-
-    ResTys.push_back(MVT::Other);
-
-    // Copy over the arguments, which are the same mostly.
-    SmallVector<SDValue, 5> Ops;
-    Ops.push_back(Base);
-    Ops.push_back(Load);
-    Ops.push_back(ModifierExpr);
-    int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
-    Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
-    Ops.push_back(Chain);
-    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
-
-    SDValue ST;
-    MachineMemOperand *Mem =
-      MF->getMachineMemOperand(MachinePointerInfo(),
-                               MachineMemOperand::MOStore, memsize, align);
-    if (MvtSize != MVT::i32)
-      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
-                                 MvtSize, Mem);
-    else
-      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
-
-    SDNode* Store = SelectStore(ST.getNode());
-
-    const SDValue Froms[] = { SDValue(N, 0),
-                              SDValue(N, 1) };
-    const SDValue Tos[]   = { SDValue(Result, 0),
-                              SDValue(Store, 0) };
-    ReplaceUses(Froms, Tos, 2);
-    return Result;
-  }
-
-  if (IntNo == Intrinsic::hexagon_brev_ldd  ||
-      IntNo == Intrinsic::hexagon_brev_ldw  ||
-      IntNo == Intrinsic::hexagon_brev_ldh  ||
-      IntNo == Intrinsic::hexagon_brev_lduh ||
-      IntNo == Intrinsic::hexagon_brev_ldb  ||
-      IntNo == Intrinsic::hexagon_brev_ldub) {
-    SDLoc dl(N);
-    SDValue Chain = N->getOperand(0);
-    SDValue Base = N->getOperand(2);
-    SDValue Load = N->getOperand(3);
-    SDValue ModifierExpr = N->getOperand(4);
-
-    // We need to add the rerurn type for the load.  This intrinsic has
-    // two return types, one for the load and one for the post-increment.
-    std::vector<EVT> ResTys;
-    unsigned opc;
-    unsigned memsize, align;
-    MVT MvtSize = MVT::i32;
-
-    if (IntNo == Intrinsic::hexagon_brev_ldd) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i64);
-      opc = Hexagon::L2_loadrd_pbr_pseudo;
-      memsize = 8;
-      align = 8;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadri_pbr_pseudo;
-      memsize = 4;
-      align = 4;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrh_pbr_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadruh_pbr_pseudo;
-      memsize = 2;
-      align = 2;
-      MvtSize = MVT::i16;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrb_pbr_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
-      opc = Hexagon::L2_loadrub_pbr_pseudo;
-      memsize = 1;
-      align = 1;
-      MvtSize = MVT::i8;
-    } else
-      llvm_unreachable("no opc");
-
-    ResTys.push_back(MVT::Other);
-
-    // Copy over the arguments, which are the same mostly.
-    SmallVector<SDValue, 4> Ops;
-    Ops.push_back(Base);
-    Ops.push_back(Load);
-    Ops.push_back(ModifierExpr);
-    Ops.push_back(Chain);
-    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
-    SDValue ST;
-    MachineMemOperand *Mem =
-      MF->getMachineMemOperand(MachinePointerInfo(),
-                               MachineMemOperand::MOStore, memsize, align);
-    if (MvtSize != MVT::i32)
-      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
-                                 MvtSize, Mem);
-    else
-      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
-
-    SDNode* Store = SelectStore(ST.getNode());
-
-    const SDValue Froms[] = { SDValue(N, 0),
-                              SDValue(N, 1) };
-    const SDValue Tos[]   = { SDValue(Result, 0),
-                              SDValue(Store, 0) };
-    ReplaceUses(Froms, Tos, 2);
-    return Result;
-  }
-
+  if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N))
+    return StoreInstrForLoadIntrinsic(L, N);
   return SelectCode(N);
 }
 
-//
-// Checking for intrinsics which have predicate registers as operand(s)
-// and lowering to the actual intrinsic.
-//
 SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   unsigned Bits;
@@ -1392,7 +1363,7 @@
     auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
       if (Op.getOpcode() != ISD::SELECT)
         return false;
-      return IsZero(Op.getOperand(1))  || IsZero(Op.getOperand(2));
+      return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
     };
 
     SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index e37f4a7..b209ff8 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -2161,28 +2161,6 @@
 let accessSize = DoubleWordAccess, hasNewValue = 0 in
 def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
 
-//===----------------------------------------------------------------------===//
-// Circular loads - Pseudo
-//
-// Please note that the input operand order in the pseudo instructions
-// doesn't match with the real instructions. Pseudo instructions operand
-// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't
-// appear in the AsmString because it's same as 'dst'.
-//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1,  mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
-class T_load_pci_pseudo <string opc, RegisterClass RC>
-  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
-             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4),
-  ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"",
-  [], "$src1 = $_dst_">;
-
-def L2_loadrb_pci_pseudo  : T_load_pci_pseudo <"memb",  IntRegs>;
-def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>;
-def L2_loadrh_pci_pseudo  : T_load_pci_pseudo <"memh",  IntRegs>;
-def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>;
-def L2_loadri_pci_pseudo  : T_load_pci_pseudo <"memw",  IntRegs>;
-def L2_loadrd_pci_pseudo  : T_load_pci_pseudo <"memd",  DoubleRegs>;
-
 
 // TODO: memb_fifo and memh_fifo must take destination register as input.
 // One-off circ loads - not enough in common to break into a class.
@@ -2284,26 +2262,6 @@
                                    HalfWordAccess, 0b0010>;
 
 //===----------------------------------------------------------------------===//
-// Bit-reversed loads - Pseudo
-//
-// Please note that 'src2' doesn't appear in the AsmString because
-// it's same as 'dst'.
-//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
-class T_load_pbr_pseudo <string opc, RegisterClass RC>
-  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
-             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-  ".error \"$dst = "#opc#"($src1++$src3:brev)\"",
-  [], "$src1 = $_dst_">;
-
-def L2_loadrb_pbr_pseudo  : T_load_pbr_pseudo <"memb",  IntRegs>;
-def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>;
-def L2_loadrh_pbr_pseudo  : T_load_pbr_pseudo <"memh",  IntRegs>;
-def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>;
-def L2_loadri_pbr_pseudo  : T_load_pbr_pseudo <"memw",  IntRegs>;
-def L2_loadrd_pbr_pseudo  : T_load_pbr_pseudo <"memd",  DoubleRegs>;
-
-//===----------------------------------------------------------------------===//
 // LD -
 //===----------------------------------------------------------------------===//
 
@@ -3757,26 +3715,6 @@
 def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
 
 //===----------------------------------------------------------------------===//
-// Circular stores - Pseudo
-//
-// Please note that the input operand order in the pseudo instructions
-// doesn't match with the real instructions. Pseudo instructions operand
-// order should mimics the ordering in the intrinsics.
-//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
-class T_store_pci_pseudo <string opc, RegisterClass RC>
-  : STInstPI<(outs IntRegs:$_dst_),
-             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4),
-  ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"",
-  [], "$_dst_ = $src1">;
-
-def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>;
-def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
-def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
-def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>;
-def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
-
-//===----------------------------------------------------------------------===//
 // Circular stores with auto-increment register
 //===----------------------------------------------------------------------===//
 let Uses = [CS] in
@@ -3922,26 +3860,6 @@
 def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
 
 //===----------------------------------------------------------------------===//
-// Bit-reversed stores - Pseudo
-//
-// Please note that the input operand order in the pseudo instructions
-// doesn't match with the real instructions. Pseudo instructions operand
-// order should mimics the ordering in the intrinsics.
-//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1,  mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
-class T_store_pbr_pseudo <string opc, RegisterClass RC>
-  : STInstPI<(outs IntRegs:$_dst_),
-             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3),
-  ".error \""#opc#"($src1++$src3:brev) = $src2\"",
-  [], "$_dst_ = $src1">;
-
-def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>;
-def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
-def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>;
-def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
-def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>;
-
-//===----------------------------------------------------------------------===//
 // ST -
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index b207aaf..a1bb77c 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1251,7 +1251,6 @@
 def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
 
 // Vector shift words with truncate and pack
-
 def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
 
 def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
@@ -1268,26 +1267,25 @@
 
 class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
   : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
-        (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+        (MI I32:$Rs, (A2_tfrrcr I32:$Ru), Val:$Rt)>;
 
-def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth,   I32>;
-def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb,   I32>;
-def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw,   I32>;
-def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
-def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std,   I64>;
+def : T_stb_pat <S2_storerh_pbr, int_hexagon_brev_sth,   I32>;
+def : T_stb_pat <S2_storerb_pbr, int_hexagon_brev_stb,   I32>;
+def : T_stb_pat <S2_storeri_pbr, int_hexagon_brev_stw,   I32>;
+def : T_stb_pat <S2_storerf_pbr, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr, int_hexagon_brev_std,   I64>;
 
 class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
   : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
-        (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+        (MI I32:$Rs, Imm:$s, (A2_tfrrcr I32:$Ru), Val:$Rt)>;
 
-def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb,   s4_0ImmPred, I32>;
-def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth,   s4_1ImmPred, I32>;
-def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw,   s4_2ImmPred, I32>;
-def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std,   s4_3ImmPred, I64>;
-def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb,   s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth,   s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw,   s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std,   s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
 
 include "HexagonIntrinsicsV3.td"
 include "HexagonIntrinsicsV4.td"
 include "HexagonIntrinsicsV5.td"
 include "HexagonIntrinsicsV60.td"
-
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 94ce0dd..d0be285 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -103,13 +103,16 @@
   Reserved.set(Hexagon::R30);
   Reserved.set(Hexagon::R31);
   Reserved.set(Hexagon::PC);
-  Reserved.set(Hexagon::GP);
   Reserved.set(Hexagon::D14);
   Reserved.set(Hexagon::D15);
   Reserved.set(Hexagon::LC0);
   Reserved.set(Hexagon::LC1);
   Reserved.set(Hexagon::SA0);
   Reserved.set(Hexagon::SA1);
+  Reserved.set(Hexagon::GP);
+  Reserved.set(Hexagon::CS0);
+  Reserved.set(Hexagon::CS1);
+  Reserved.set(Hexagon::CS);
   return Reserved;
 }