[PowerPC] Eliminate sign- and zero-extensions if already sign- or zero-extended

This patch enables redundant sign- and zero-extension elimination in PowerPC MI Peephole pass.
If the input value of a sign- or zero-extension is known to be already sign- or zero-extended, the operation is redundant and can be eliminated.
One common case is sign-extensions for a method parameter or for a method return value; they must be sign- or zero-extended as defined in PPC ELF ABI. 
For example of the following simple code, two extsw instructions are generated before the invocation of int_func and before the return. With this patch, both extsw are eliminated.

void int_func(int);
void ii_test(int a) {
    if (a & 1) return int_func(a);
}

Such redundant sign- or zero-extensions are quite common in many programs; e.g. I observed about 60,000 occurrences of the elimination while compiling the LLVM+CLANG.

Differential Revision: https://reviews.llvm.org/D31319

llvm-svn: 315888
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index a71841f..9da20d9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -260,6 +260,7 @@
   switch (MI.getOpcode()) {
   default: return false;
   case PPC::EXTSW:
+  case PPC::EXTSW_32:
   case PPC::EXTSW_32_64:
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
@@ -2103,3 +2104,241 @@
 int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
   return PPC::getRecordFormOpcode(Opcode);
 }
+
+// This function returns true if the machine instruction
+// always outputs a value by sign-extending a 32 bit value,
+// i.e. 0 to 31-th bits are same as 32-th bit.
+static bool isSignExtendingOp(const MachineInstr &MI) {
+  int Opcode = MI.getOpcode();
+  if (Opcode == PPC::LI     || Opcode == PPC::LI8     ||
+      Opcode == PPC::LIS    || Opcode == PPC::LIS8    ||
+      Opcode == PPC::SRAW   || Opcode == PPC::SRAWo   ||
+      Opcode == PPC::SRAWI  || Opcode == PPC::SRAWIo  ||
+      Opcode == PPC::LWA    || Opcode == PPC::LWAX    ||
+      Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
+      Opcode == PPC::LHA    || Opcode == PPC::LHAX    ||
+      Opcode == PPC::LHA8   || Opcode == PPC::LHAX8   ||
+      Opcode == PPC::LBZ    || Opcode == PPC::LBZX    ||
+      Opcode == PPC::LBZ8   || Opcode == PPC::LBZX8   ||
+      Opcode == PPC::LBZU   || Opcode == PPC::LBZUX   ||
+      Opcode == PPC::LBZU8  || Opcode == PPC::LBZUX8  ||
+      Opcode == PPC::LHZ    || Opcode == PPC::LHZX    ||
+      Opcode == PPC::LHZ8   || Opcode == PPC::LHZX8   ||
+      Opcode == PPC::LHZU   || Opcode == PPC::LHZUX   ||
+      Opcode == PPC::LHZU8  || Opcode == PPC::LHZUX8  ||
+      Opcode == PPC::EXTSB  || Opcode == PPC::EXTSBo  ||
+      Opcode == PPC::EXTSH  || Opcode == PPC::EXTSHo  ||
+      Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8  ||
+      Opcode == PPC::EXTSW  || Opcode == PPC::EXTSWo  ||
+      Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
+      Opcode == PPC::EXTSB8_32_64)
+    return true;
+
+  if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33)
+    return true;
+
+  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
+       Opcode == PPC::RLWNM  || Opcode == PPC::RLWNMo) &&
+      MI.getOperand(3).getImm() > 0 &&
+      MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
+    return true;
+
+  return false;
+}
+
+// This function returns true if the machine instruction
+// always outputs zeros in higher 32 bits.
+static bool isZeroExtendingOp(const MachineInstr &MI) {
+  int Opcode = MI.getOpcode();
+  // The 16-bit immediate is sign-extended in li/lis.
+  // If the most significant bit is zero, all higher bits are zero.
+  if (Opcode == PPC::LI  || Opcode == PPC::LI8 ||
+      Opcode == PPC::LIS || Opcode == PPC::LIS8) {
+    int64_t Imm = MI.getOperand(1).getImm();
+    if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
+      return true;
+  }
+
+  // We have some variations of rotate-and-mask instructions
+  // that clear higher 32-bits.
+  if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
+       Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo  ||
+       Opcode == PPC::RLDICL_32_64) &&
+      MI.getOperand(3).getImm() >= 32)
+    return true;
+
+  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
+      MI.getOperand(3).getImm() >= 32 &&
+      MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm())
+    return true;
+
+  if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
+       Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
+       Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
+      MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
+    return true;
+
+  // There are other instructions that clear higher 32-bits.
+  if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
+      Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
+      Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 ||
+      Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
+      Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo ||
+      Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW ||
+      Opcode == PPC::SLW     || Opcode == PPC::SLWo    ||
+      Opcode == PPC::SRW     || Opcode == PPC::SRWo    ||
+      Opcode == PPC::SLW8    || Opcode == PPC::SRW8    ||
+      Opcode == PPC::SLWI    || Opcode == PPC::SLWIo   ||
+      Opcode == PPC::SRWI    || Opcode == PPC::SRWIo   ||
+      Opcode == PPC::LWZ     || Opcode == PPC::LWZX    ||
+      Opcode == PPC::LWZU    || Opcode == PPC::LWZUX   ||
+      Opcode == PPC::LWBRX   || Opcode == PPC::LHBRX   ||
+      Opcode == PPC::LHZ     || Opcode == PPC::LHZX    ||
+      Opcode == PPC::LHZU    || Opcode == PPC::LHZUX   ||
+      Opcode == PPC::LBZ     || Opcode == PPC::LBZX    ||
+      Opcode == PPC::LBZU    || Opcode == PPC::LBZUX   ||
+      Opcode == PPC::LWZ8    || Opcode == PPC::LWZX8   ||
+      Opcode == PPC::LWZU8   || Opcode == PPC::LWZUX8  ||
+      Opcode == PPC::LWBRX8  || Opcode == PPC::LHBRX8  ||
+      Opcode == PPC::LHZ8    || Opcode == PPC::LHZX8   ||
+      Opcode == PPC::LHZU8   || Opcode == PPC::LHZUX8  ||
+      Opcode == PPC::LBZ8    || Opcode == PPC::LBZX8   ||
+      Opcode == PPC::LBZU8   || Opcode == PPC::LBZUX8  ||
+      Opcode == PPC::ANDIo   || Opcode == PPC::ANDISo  ||
+      Opcode == PPC::ROTRWI  || Opcode == PPC::ROTRWIo ||
+      Opcode == PPC::EXTLWI  || Opcode == PPC::EXTLWIo ||
+      Opcode == PPC::MFVSRWZ)
+    return true;
+
+  return false;
+}
+
+// We limit the max depth to track incoming values of PHIs or binary ops
+// (e.g. AND) to avoid exsessive cost.
+const unsigned MAX_DEPTH = 1;
+
+bool
+PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
+                                   const unsigned Depth) const {
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+  switch (MI.getOpcode()) {
+  case PPC::COPY: {
+    unsigned SrcReg = MI.getOperand(1).getReg();
+
+    // In both ELFv1 and v2 ABI, method parameters and the return value
+    // are sign- or zero-extended.
+    if (MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+      // We check the ZExt/SExt flags for a method parameter.
+      if (MI.getParent()->getBasicBlock() ==
+          &MF->getFunction()->getEntryBlock()) {
+        unsigned VReg = MI.getOperand(0).getReg();
+        if (MF->getRegInfo().isLiveIn(VReg))
+          return SignExt ? FuncInfo->isLiveInSExt(VReg) :
+                           FuncInfo->isLiveInZExt(VReg);
+      }
+
+      // For a method return value, we check the ZExt/SExt flags in attribute.
+      // We assume the following code sequence for method call.
+      //   ADJCALLSTACKDOWN 32, %R1<imp-def,dead>, %R1<imp-use>
+      //   BL8_NOP <ga:@func>,...
+      //   ADJCALLSTACKUP 32, 0, %R1<imp-def,dead>, %R1<imp-use>
+      //   %vreg5<def> = COPY %X3; G8RC:%vreg5
+      if (SrcReg == PPC::X3) {
+        const MachineBasicBlock *MBB = MI.getParent();
+        MachineBasicBlock::const_instr_iterator II =
+          MachineBasicBlock::const_instr_iterator(&MI);
+        if (II != MBB->instr_begin() &&
+            (--II)->getOpcode() == PPC::ADJCALLSTACKUP) {
+          const MachineInstr &CallMI = *(--II);
+          if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) {
+            const Function *CalleeFn =
+              dyn_cast<Function>(CallMI.getOperand(0).getGlobal());
+            const IntegerType *IntTy =
+              dyn_cast<IntegerType>(CalleeFn->getReturnType());
+            const AttributeSet &Attrs =
+              CalleeFn->getAttributes().getRetAttributes();
+            if (IntTy && IntTy->getBitWidth() <= 32)
+              return Attrs.hasAttribute(SignExt ? Attribute::SExt :
+                                                  Attribute::ZExt);
+          }
+        }
+      }
+    }
+
+    // If this is a copy from another register, we recursively check source.
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+      return false;
+    const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+    if (SrcMI != NULL)
+      return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
+
+    return false;
+  }
+
+  case PPC::ANDIo:
+  case PPC::ANDISo:
+  case PPC::ORI:
+  case PPC::ORIS:
+  case PPC::XORI:
+  case PPC::XORIS:
+  case PPC::ANDIo8:
+  case PPC::ANDISo8:
+  case PPC::ORI8:
+  case PPC::ORIS8:
+  case PPC::XORI8:
+  case PPC::XORIS8: {
+    // logical operation with 16-bit immediate does not change the upper bits.
+    // So, we track the operand register as we do for register copy.
+    unsigned SrcReg = MI.getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+      return false;
+    const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+    if (SrcMI != NULL)
+      return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
+
+    return false;
+  }
+
+  // If all incoming values are sign-/zero-extended,
+  // the output of AND, OR, ISEL or PHI is also sign-/zero-extended.
+  case PPC::AND:
+  case PPC::AND8:
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::ISEL:
+  case PPC::PHI: {
+    if (Depth >= MAX_DEPTH)
+      return false;
+
+    // The input registers for PHI are operand 1, 3, ...
+    // The input registers for others are operand 1 and 2.
+    unsigned E = 3, D = 1;
+    if (MI.getOpcode() == PPC::PHI) {
+      E = MI.getNumOperands();
+      D = 2;
+    }
+
+    for (unsigned I = 1; I != E; I += D) {
+      if (MI.getOperand(I).isReg()) {
+        unsigned SrcReg = MI.getOperand(I).getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+          return false;
+        const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+        if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1))
+          return false;
+      }
+      else
+        return false;
+    }
+    return true;
+  }
+
+  default:
+    return SignExt?isSignExtendingOp(MI):
+                   isZeroExtendingOp(MI);
+  }
+  return false;
+}