Mark x86's V_SET0 and V_SETALLONES with isSimpleLoad, and teach X86's
foldMemoryOperand how to "fold" them, by converting them into constant-pool
loads. When they aren't folded, they use xorps/cmpeqd, but for example when
register pressure is high, they may now be folded as memory operands, which
reduces register pressure.

Also, mark V_SET0 isAsCheapAsAMove so that two-address-elimination will
remat it instead of copying zeros around (V_SETALLONES was already marked).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60461 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4b2a20d..a43c5dd 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -2127,9 +2128,36 @@
     return NULL;
 
   SmallVector<MachineOperand,4> MOs;
-  unsigned NumOps = LoadMI->getDesc().getNumOperands();
-  for (unsigned i = NumOps - 4; i != NumOps; ++i)
-    MOs.push_back(LoadMI->getOperand(i));
+  if (LoadMI->getOpcode() == X86::V_SET0 ||
+      LoadMI->getOpcode() == X86::V_SETALLONES) {
+    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+    // Create a constant-pool entry and operands to load from it.
+
+    // x86-32 PIC requires a PIC base register for constant pools.
+    unsigned PICBase = 0;
+    if (TM.getRelocationModel() == Reloc::PIC_ &&
+        !TM.getSubtarget<X86Subtarget>().is64Bit())
+      PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+
+    // Create a v4i32 constant-pool entry.
+    MachineConstantPool &MCP = *MF.getConstantPool();
+    const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
+    Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
+                    ConstantVector::getNullValue(Ty) :
+                    ConstantVector::getAllOnesValue(Ty);
+    unsigned CPI = MCP.getConstantPoolIndex(C, /*AlignmentLog2=*/4);
+
+    // Create operands to load from the constant pool entry.
+    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+    MOs.push_back(MachineOperand::CreateImm(1));
+    MOs.push_back(MachineOperand::CreateReg(0, false));
+    MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+  } else {
+    // Folding a normal load. Just copy the load's address operands.
+    unsigned NumOps = LoadMI->getDesc().getNumOperands();
+    for (unsigned i = NumOps - 4; i != NumOps; ++i)
+      MOs.push_back(LoadMI->getOperand(i));
+  }
   return foldMemoryOperand(MF, MI, Ops[0], MOs);
 }