[mips][msa] Added bnz.df, bnz.v, bz.df, and bz.v

These intrinsics are legalized to V(ALL|ANY)_(NON)?ZERO nodes,
are matched as SN?Z_[BHWDV]_PSEUDO pseudo's, and emitted as
a branch/mov sequence to evaluate to 0 or 1.

Note: The resulting code is sub-optimal since it doesnt seem to be possible
to feed the result of an intrinsic directly into a brcond. At the moment
it uses (SETCC (VALL_ZERO $ws), 0, SETEQ) and similar which unnecessarily
evaluates the boolean twice.

llvm-svn: 189478
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index c13f53a..4d1f329 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -208,6 +208,10 @@
   case MipsISD::SHRL_DSP:          return "MipsISD::SHRL_DSP";
   case MipsISD::SETCC_DSP:         return "MipsISD::SETCC_DSP";
   case MipsISD::SELECT_CC_DSP:     return "MipsISD::SELECT_CC_DSP";
+  case MipsISD::VALL_ZERO:         return "MipsISD::VALL_ZERO";
+  case MipsISD::VANY_ZERO:         return "MipsISD::VANY_ZERO";
+  case MipsISD::VALL_NONZERO:      return "MipsISD::VALL_NONZERO";
+  case MipsISD::VANY_NONZERO:      return "MipsISD::VANY_NONZERO";
   default:                         return NULL;
   }
 }
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 29671b0..4cc5a6a 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -152,6 +152,12 @@
       SETCC_DSP,
       SELECT_CC_DSP,
 
+      // Vector comparisons
+      VALL_ZERO,
+      VANY_ZERO,
+      VALL_NONZERO,
+      VANY_NONZERO,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrFormats.td b/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
index f337f9d..b01167471 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -119,3 +119,8 @@
   let Inst{25-21} = major;
   let Inst{5-0} = minor;
 }
+
+class MSA_VECS10_FMT<bits<5> major, bits<6> minor>: MSAInst {
+  let Inst{25-21} = major;
+  let Inst{5-0} = minor;
+}
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 53fceb7..1814b1c 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -11,6 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+
+def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
+def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
+def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>;
+def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>;
+
 def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
 def immSExt10: ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
 
@@ -147,7 +154,14 @@
 class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>;
 class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>;
 
-class BSEL_V_ENC : MSA_VEC_FMT<0b00110, 0b011110>;
+class BNZ_B_ENC : MSA_I10_FMT<0b000, 0b00, 0b001100>;
+class BNZ_H_ENC : MSA_I10_FMT<0b000, 0b01, 0b001100>;
+class BNZ_W_ENC : MSA_I10_FMT<0b000, 0b10, 0b001100>;
+class BNZ_D_ENC : MSA_I10_FMT<0b000, 0b11, 0b001100>;
+
+class BNZ_V_ENC : MSA_VEC_FMT<0b01000, 0b011110>;
+
+class BSEL_V_ENC : MSA_VECS10_FMT<0b00110, 0b011110>;
 
 class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>;
 
@@ -161,6 +175,13 @@
 class BSETI_W_ENC : MSA_BIT_W_FMT<0b100, 0b001001>;
 class BSETI_D_ENC : MSA_BIT_D_FMT<0b100, 0b001001>;
 
+class BZ_B_ENC : MSA_I10_FMT<0b001, 0b00, 0b001100>;
+class BZ_H_ENC : MSA_I10_FMT<0b001, 0b01, 0b001100>;
+class BZ_W_ENC : MSA_I10_FMT<0b001, 0b10, 0b001100>;
+class BZ_D_ENC : MSA_I10_FMT<0b001, 0b11, 0b001100>;
+
+class BZ_V_ENC : MSA_VECS10_FMT<0b01001, 0b011110>;
+
 class CEQ_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001111>;
 class CEQ_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001111>;
 class CEQ_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001111>;
@@ -875,6 +896,18 @@
                             RegisterClass RCWS,  RegisterClass RCWT = RCWS> :
   MSA_3R_4R_DESC_BASE<instr_asm, OpNode, itin, RCWD, RCWS, RCWT>;
 
+class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterClass RCWD> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins RCWD:$wd, brtarget:$offset);
+  string AsmString = !strconcat(instr_asm, "\t$wd, $offset");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = IIBranch;
+  bit isBranch = 1;
+  bit isTerminator = 1;
+  bit hasDelaySlot = 1;
+  list<Register> Defs = [AT];
+}
+
 class MSA_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                            InstrItinClass itin, RegisterClass RCD,
                            RegisterClass RCWS> {
@@ -1129,6 +1162,13 @@
 class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d,
                                          NoItinerary, MSA128D, MSA128D>;
 
+class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128B>;
+class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128H>;
+class BNZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bnz.w", MSA128W>;
+class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>;
+
+class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>;
+
 class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, NoItinerary,
                                       MSA128B, MSA128B>;
 
@@ -1153,6 +1193,13 @@
 class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d,
                                          NoItinerary, MSA128D, MSA128D>;
 
+class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128B>;
+class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128H>;
+class BZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bz.w", MSA128W>;
+class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>;
+
+class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>;
+
 class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, NoItinerary,
                                     MSA128B, MSA128B>, IsCommutable;
 class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, NoItinerary,
@@ -2344,6 +2391,13 @@
 def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC, Requires<[HasMSA]>;
 def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC, Requires<[HasMSA]>;
 
+def BNZ_B : BNZ_B_ENC, BNZ_B_DESC, Requires<[HasMSA]>;
+def BNZ_H : BNZ_H_ENC, BNZ_H_DESC, Requires<[HasMSA]>;
+def BNZ_W : BNZ_W_ENC, BNZ_W_DESC, Requires<[HasMSA]>;
+def BNZ_D : BNZ_D_ENC, BNZ_D_DESC, Requires<[HasMSA]>;
+
+def BNZ_V : BNZ_V_ENC, BNZ_V_DESC, Requires<[HasMSA]>;
+
 def BSEL_V : BSEL_V_ENC, BSEL_V_DESC, Requires<[HasMSA]>;
 
 def BSELI_B : BSELI_B_ENC, BSELI_B_DESC, Requires<[HasMSA]>;
@@ -2358,6 +2412,13 @@
 def BSETI_W : BSETI_W_ENC, BSETI_W_DESC, Requires<[HasMSA]>;
 def BSETI_D : BSETI_D_ENC, BSETI_D_DESC, Requires<[HasMSA]>;
 
+def BZ_B : BZ_B_ENC, BZ_B_DESC, Requires<[HasMSA]>;
+def BZ_H : BZ_H_ENC, BZ_H_DESC, Requires<[HasMSA]>;
+def BZ_W : BZ_W_ENC, BZ_W_DESC, Requires<[HasMSA]>;
+def BZ_D : BZ_D_ENC, BZ_D_DESC, Requires<[HasMSA]>;
+
+def BZ_V : BZ_V_ENC, BZ_V_DESC, Requires<[HasMSA]>;
+
 def CEQ_B : CEQ_B_ENC, CEQ_B_DESC, Requires<[HasMSA]>;
 def CEQ_H : CEQ_H_ENC, CEQ_H_DESC, Requires<[HasMSA]>;
 def CEQ_W : CEQ_W_ENC, CEQ_W_DESC, Requires<[HasMSA]>;
@@ -3117,3 +3178,35 @@
 def : MSABitconvertReverseHInDPat<v8f16, v2f64, MSA128H>;
 def : MSABitconvertReverseWInDPat<v4i32, v2f64, MSA128W>;
 def : MSABitconvertReverseWInDPat<v4f32, v2f64, MSA128W>;
+
+// Pseudos used to implement BNZ.df, and BZ.df
+
+class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
+                                   RegisterClass RCWS, InstrItinClass itin> :
+  MipsPseudo<(outs GPR32:$dst),
+             (ins RCWS:$ws),
+             [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
+  bit usesCustomInserter = 1;
+}
+
+def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
+                                                MSA128B, NoItinerary>;
+def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16,
+                                                MSA128H, NoItinerary>;
+def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32,
+                                                MSA128W, NoItinerary>;
+def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64,
+                                                MSA128D, NoItinerary>;
+def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8,
+                                                MSA128B, NoItinerary>;
+
+def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8,
+                                               MSA128B, NoItinerary>;
+def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16,
+                                               MSA128H, NoItinerary>;
+def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32,
+                                               MSA128W, NoItinerary>;
+def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
+                                               MSA128D, NoItinerary>;
+def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
+                                               MSA128B, NoItinerary>;
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 5341277..9108211 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -125,6 +125,7 @@
   setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::MUL);
 
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
@@ -554,6 +555,26 @@
     return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   case Mips::BPOSGE32_PSEUDO:
     return emitBPOSGE32(MI, BB);
+  case Mips::SNZ_B_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
+  case Mips::SNZ_H_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
+  case Mips::SNZ_W_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
+  case Mips::SNZ_D_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
+  case Mips::SNZ_V_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
+  case Mips::SZ_B_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
+  case Mips::SZ_H_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
+  case Mips::SZ_W_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
+  case Mips::SZ_D_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
+  case Mips::SZ_V_PSEUDO:
+    return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
   }
 }
 
@@ -690,6 +711,16 @@
   return DAG.getMergeValues(Vals, 2, DL);
 }
 
+static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+  SDLoc DL(Op);
+  SDValue Value = Op->getOperand(1);
+  EVT ResTy = Op->getValueType(0);
+
+  SDValue Result = DAG.getNode(Opc, DL, ResTy, Value);
+
+  return Result;
+}
+
 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                       SelectionDAG &DAG) const {
   switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
@@ -727,6 +758,20 @@
     return lowerDSPIntr(Op, DAG, MipsISD::MSub);
   case Intrinsic::mips_msubu:
     return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+  case Intrinsic::mips_bnz_b:
+  case Intrinsic::mips_bnz_h:
+  case Intrinsic::mips_bnz_w:
+  case Intrinsic::mips_bnz_d:
+    return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_NONZERO);
+  case Intrinsic::mips_bnz_v:
+    return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_NONZERO);
+  case Intrinsic::mips_bz_b:
+  case Intrinsic::mips_bz_h:
+  case Intrinsic::mips_bz_w:
+  case Intrinsic::mips_bz_d:
+    return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO);
+  case Intrinsic::mips_bz_v:
+    return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO);
   }
 }
 
@@ -830,7 +875,7 @@
   case Intrinsic::mips_stx_h:
   case Intrinsic::mips_stx_w:
   case Intrinsic::mips_stx_d:
-   return lowerMSAStoreIntr(Op, DAG, Intr);
+    return lowerMSAStoreIntr(Op, DAG, Intr);
   }
 }
 
@@ -896,3 +941,70 @@
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return Sink;
 }
+
+MachineBasicBlock * MipsSETargetLowering::
+emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
+                     unsigned BranchOp) const{
+  // $bb:
+  //  vany_nonzero $rd, $ws
+  //  =>
+  // $bb:
+  //  bnz.b $ws, $tbb
+  //  b $fbb
+  // $fbb:
+  //  li $rd1, 0
+  //  b $sink
+  // $tbb:
+  //  li $rd2, 1
+  // $sink:
+  //  $rd = phi($rd1, $fbb, $rd2, $tbb)
+
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+  DebugLoc DL = MI->getDebugLoc();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, FBB);
+  F->insert(It, TBB);
+  F->insert(It, Sink);
+
+  // Transfer the remainder of BB and its successor edges to Sink.
+  Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+               BB->end());
+  Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add successors.
+  BB->addSuccessor(FBB);
+  BB->addSuccessor(TBB);
+  FBB->addSuccessor(Sink);
+  TBB->addSuccessor(Sink);
+
+  // Insert the real bnz.b instruction to $BB.
+  BuildMI(BB, DL, TII->get(BranchOp))
+    .addReg(MI->getOperand(1).getReg())
+    .addMBB(TBB);
+
+  // Fill $FBB.
+  unsigned RD1 = RegInfo.createVirtualRegister(RC);
+  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
+    .addReg(Mips::ZERO).addImm(0);
+  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+  // Fill $TBB.
+  unsigned RD2 = RegInfo.createVirtualRegister(RC);
+  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
+    .addReg(Mips::ZERO).addImm(1);
+
+  // Insert phi function to $Sink.
+  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return Sink;
+}
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h
index de43092..d1a18e1 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -67,6 +67,9 @@
 
     MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
                                     MachineBasicBlock *BB) const;
+    MachineBasicBlock *emitMSACBranchPseudo(MachineInstr *MI,
+                                            MachineBasicBlock *BB,
+                                            unsigned BranchOp) const;
   };
 }