[SystemZ] Postpone NI->RISBG conversion to convertToThreeAddress()

r186399 aggressively used the RISBG instruction for immediate ANDs,
both because it can handle some values that AND IMMEDIATE can't,
and because it allows the destination register to be different from
the source.  I realized later while implementing the distinct-ops
support that it would be better to leave the choice up to
convertToThreeAddress() instead.  The AND IMMEDIATE form is shorter
and is less likely to be cracked.

This is a problem for 32-bit ANDs because we assume that all 32-bit
operations will leave the high word untouched, whereas RISBG used in
this way will either clear the high word or copy it from the source
register.  The patch uses the z196 instruction RISBLG for this instead.

This means that z10 will be restricted to NILL, NILH and NILF for
32-bit ANDs, but I think that should be OK for now.  Although we're
using z10 as the base architecture, the optimization work is going
to be focused more on z196 and zEC12.

llvm-svn: 187492
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 8866253..b7e966f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -132,6 +132,14 @@
     return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
   }
 
+  const SystemZTargetMachine &getTargetMachine() const {
+    return static_cast<const SystemZTargetMachine &>(TM);
+  }
+
+  const SystemZInstrInfo *getInstrInfo() const {
+    return getTargetMachine().getInstrInfo();
+  }
+
   // Try to fold more of the base or index of AM into AM, where IsBase
   // selects between the base and index.
   bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
@@ -236,6 +244,10 @@
   // set Op to that Y.
   bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask);
 
+  // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
+  // Return true on success.
+  bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask);
+
   // Try to fold some of RxSBG.Input into other fields of RxSBG.
   // Return true on success.
   bool expandRxSBG(RxSBGOperands &RxSBG);
@@ -607,52 +619,15 @@
   return true;
 }
 
-// Return true if Mask matches the regexp 0*1+0*, given that zero masks
-// have already been filtered out.  Store the first set bit in LSB and
-// the number of set bits in Length if so.
-static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
-  unsigned First = findFirstSet(Mask);
-  uint64_t Top = (Mask >> First) + 1;
-  if ((Top & -Top) == Top) {
-    LSB = First;
-    Length = findFirstSet(Top);
-    return true;
-  }
-  return false;
-}
-
-// Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
-// Return true on success.
-static bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
+bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
+  const SystemZInstrInfo *TII = getInstrInfo();
   if (RxSBG.Rotate != 0)
     Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));
   Mask &= RxSBG.Mask;
-
-  // Reject trivial all-zero masks.
-  if (Mask == 0)
-    return false;
-
-  // Handle the 1+0+ or 0+1+0* cases.  Start then specifies the index of
-  // the msb and End specifies the index of the lsb.
-  unsigned LSB, Length;
-  if (isStringOfOnes(Mask, LSB, Length)) {
+  if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) {
     RxSBG.Mask = Mask;
-    RxSBG.Start = 63 - (LSB + Length - 1);
-    RxSBG.End = 63 - LSB;
     return true;
   }
-
-  // Handle the wrap-around 1+0+1+ cases.  Start then specifies the msb
-  // of the low 1s and End specifies the lsb of the high 1s.
-  if (isStringOfOnes(Mask ^ allOnes(RxSBG.BitSize), LSB, Length)) {
-    assert(LSB > 0 && "Bottom bit must be set");
-    assert(LSB + Length < RxSBG.BitSize && "Top bit must be set");
-    RxSBG.Mask = Mask;
-    RxSBG.Start = 63 - (LSB - 1);
-    RxSBG.End = 63 - (LSB + Length);
-    return true;
-  }
-
   return false;
 }
 
@@ -824,24 +799,38 @@
 }
 
 SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+  EVT VT = N->getValueType(0);
   RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
   unsigned Count = 0;
   while (expandRxSBG(RISBG))
     Count += 1;
-  // Prefer to use normal shift instructions over RISBG, since they can handle
-  // all cases and are sometimes shorter.  Prefer to use RISBG for ANDs though,
-  // since it is effectively a three-operand instruction in this case,
-  // and since it can handle some masks that AND IMMEDIATE can't.
-  if (Count < (N->getOpcode() == ISD::AND ? 1U : 2U))
+  if (Count == 0)
     return 0;
+  if (Count == 1) {
+    // Prefer to use normal shift instructions over RISBG, since they can handle
+    // all cases and are sometimes shorter.
+    if (N->getOpcode() != ISD::AND)
+      return 0;
 
-  // Prefer register extensions like LLC over RISBG.
-  if (RISBG.Rotate == 0 &&
-      (RISBG.Start == 32 || RISBG.Start == 48 || RISBG.Start == 56) &&
-      RISBG.End == 63)
-    return 0;
+    // Prefer register extensions like LLC over RISBG.  Also prefer to start
+    // out with normal ANDs if one instruction would be enough.  We can convert
+    // these ANDs into an RISBG later if a three-address instruction is useful.
+    if (VT == MVT::i32 ||
+        RISBG.Mask == 0xff ||
+        RISBG.Mask == 0xffff ||
+        SystemZ::isImmLF(~RISBG.Mask) ||
+        SystemZ::isImmHF(~RISBG.Mask)) {
+      // Force the new mask into the DAG, since it may include known-one bits.
+      ConstantSDNode *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode());
+      if (MaskN->getZExtValue() != RISBG.Mask) {
+        SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT);
+        N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
+        return SelectCode(N);
+      }
+      return 0;
+    }
+  }  
 
-  EVT VT = N->getValueType(0);
   SDValue Ops[5] = {
     getUNDEF64(SDLoc(N)),
     convertTo(SDLoc(N), MVT::i64, RISBG.Input),
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 26ea086..12211fe 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -23,6 +23,11 @@
 
 using namespace llvm;
 
+// Return a mask with Count low bits set.
+static uint64_t allOnes(unsigned int Count) {
+  return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+}
+
 SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
   : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
     RI(tm), TM(tm) {
@@ -507,6 +512,49 @@
           MI->getOperand(3).getReg() == 0);
 }
 
+namespace {
+  struct LogicOp {
+    LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {}
+    LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
+      : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
+
+    operator bool() const { return RegSize; }
+
+    unsigned RegSize, ImmLSB, ImmSize;
+  };
+}
+
+static LogicOp interpretAndImmediate(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::NILL32: return LogicOp(32,  0, 16);
+  case SystemZ::NILH32: return LogicOp(32, 16, 16);
+  case SystemZ::NILL:   return LogicOp(64,  0, 16);
+  case SystemZ::NILH:   return LogicOp(64, 16, 16);
+  case SystemZ::NIHL:   return LogicOp(64, 32, 16);
+  case SystemZ::NIHH:   return LogicOp(64, 48, 16);
+  case SystemZ::NILF32: return LogicOp(32,  0, 32);
+  case SystemZ::NILF:   return LogicOp(64,  0, 32);
+  case SystemZ::NIHF:   return LogicOp(64, 32, 32);
+  default:              return LogicOp();
+  }
+}
+
+// Used to return from convertToThreeAddress after replacing two-address
+// instruction OldMI with three-address instruction NewMI.
+static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
+                                                 MachineInstr *NewMI,
+                                                 LiveVariables *LV) {
+  if (LV) {
+    unsigned NumOps = OldMI->getNumOperands();
+    for (unsigned I = 1; I < NumOps; ++I) {
+      MachineOperand &Op = OldMI->getOperand(I);
+      if (Op.isReg() && Op.isKill())
+        LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI);
+    }
+  }
+  return NewMI;
+}
+
 MachineInstr *
 SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                                         MachineBasicBlock::iterator &MBBI,
@@ -524,26 +572,50 @@
   if (TM.getSubtargetImpl()->hasDistinctOps()) {
     int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
     if (ThreeOperandOpcode >= 0) {
-      unsigned DestReg = MI->getOperand(0).getReg();
+      MachineOperand &Dest = MI->getOperand(0);
       MachineOperand &Src = MI->getOperand(1);
-      MachineInstrBuilder MIB = BuildMI(*MBB, MBBI, MI->getDebugLoc(),
-                                        get(ThreeOperandOpcode), DestReg);
+      MachineInstrBuilder MIB =
+        BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
+        .addOperand(Dest);
       // Keep the kill state, but drop the tied flag.
-      MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()));
+      MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
       // Keep the remaining operands as-is.
       for (unsigned I = 2; I < NumOps; ++I)
         MIB.addOperand(MI->getOperand(I));
-      MachineInstr *NewMI = MIB;
+      return finishConvertToThreeAddress(MI, MIB, LV);
+    }
+  }
 
-      // Transfer killing information to the new instruction.
-      if (LV) {
-        for (unsigned I = 1; I < NumOps; ++I) {
-          MachineOperand &Op = MI->getOperand(I);
-          if (Op.isReg() && Op.isKill())
-            LV->replaceKillInstruction(Op.getReg(), MI, NewMI);
+  // Try to convert an AND into an RISBG-type instruction.
+  if (LogicOp And = interpretAndImmediate(Opcode)) {
+    unsigned NewOpcode;
+    if (And.RegSize == 64)
+      NewOpcode = SystemZ::RISBG;
+    else if (TM.getSubtargetImpl()->hasHighWord())
+      NewOpcode = SystemZ::RISBLG32;
+    else
+      // We can't use RISBG for 32-bit operations because it clobbers the
+      // high word of the destination too.
+      NewOpcode = 0;
+    if (NewOpcode) {
+      uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+      // AND IMMEDIATE leaves the other bits of the register unchanged.
+      Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
+      unsigned Start, End;
+      if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
+        if (NewOpcode == SystemZ::RISBLG32) {
+          Start &= 31;
+          End &= 31;
         }
+        MachineOperand &Dest = MI->getOperand(0);
+        MachineOperand &Src = MI->getOperand(1);
+        MachineInstrBuilder MIB =
+          BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
+          .addOperand(Dest).addReg(0)
+          .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
+          .addImm(Start).addImm(End + 128).addImm(0);
+        return finishConvertToThreeAddress(MI, MIB, LV);
       }
-      return MIB;
     }
   }
   return 0;
@@ -775,6 +847,48 @@
   return 0;
 }
 
+// Return true if Mask matches the regexp 0*1+0*, given that zero masks
+// have already been filtered out.  Store the first set bit in LSB and
+// the number of set bits in Length if so.
+static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
+  unsigned First = findFirstSet(Mask);
+  uint64_t Top = (Mask >> First) + 1;
+  if ((Top & -Top) == Top) {
+    LSB = First;
+    Length = findFirstSet(Top);
+    return true;
+  }
+  return false;
+}
+
+bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
+                                   unsigned &Start, unsigned &End) const {
+  // Reject trivial all-zero masks.
+  if (Mask == 0)
+    return false;
+
+  // Handle the 1+0+ or 0+1+0* cases.  Start then specifies the index of
+  // the msb and End specifies the index of the lsb.
+  unsigned LSB, Length;
+  if (isStringOfOnes(Mask, LSB, Length)) {
+    Start = 63 - (LSB + Length - 1);
+    End = 63 - LSB;
+    return true;
+  }
+
+  // Handle the wrap-around 1+0+1+ cases.  Start then specifies the msb
+  // of the low 1s and End specifies the lsb of the high 1s.
+  if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) {
+    assert(LSB > 0 && "Bottom bit must be set");
+    assert(LSB + Length < BitSize && "Top bit must be set");
+    Start = 63 - (LSB - 1);
+    End = 63 - (LSB + Length);
+    return true;
+  }
+
+  return false;
+}
+
 unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
                                                const MachineInstr *MI) const {
   switch (Opcode) {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 7d11f39..7fc0ca9 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -187,6 +187,12 @@
   // exists.
   unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
 
+  // Return true if ROTATE AND ... SELECTED BITS can be used to select bits
+  // Mask of the R2 operand, given that only the low BitSize bits of Mask are
+  // significant.  Set Start and End to the I3 and I4 operands if so.
+  bool isRxSBGMask(uint64_t Mask, unsigned BitSize,
+                   unsigned &Start, unsigned &End) const;
+
   // If Opcode is a COMPARE opcode for which an associated COMPARE AND
   // BRANCH exists, return the opcode for the latter, otherwise return 0.
   // MI, if nonnull, is the compare instruction.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 6386d16..b3ea36d 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -692,21 +692,23 @@
     defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
   }
 
-  // ANDs of a 16-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in {
-    def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
-    def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
-  }
-  def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
-  def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
-  def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
-  def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+  let isConvertibleToThreeAddress = 1 in {
+    // ANDs of a 16-bit immediate, leaving other bits unaffected.
+    let isCodeGenOnly = 1 in {
+      def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+      def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+    }
+    def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
+    def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
+    def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
+    def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
 
-  // ANDs of a 32-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in
-    def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
-  def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
-  def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+    // ANDs of a 32-bit immediate, leaving other bits unaffected.
+    let isCodeGenOnly = 1 in
+      def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+    def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
+    def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+  }
 
   // ANDs of memory.
   defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
@@ -869,6 +871,9 @@
 
 // Forms of RISBG that only affect one word of the destination register.
 // They do not set CC.
+let isCodeGenOnly = 1 in
+  def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>,
+                 Requires<[FeatureHighWord]>;
 def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GR64, GR64>,
              Requires<[FeatureHighWord]>;
 def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,