Follow up to 81494. When the folded reload is narrowed to a 32-bit load then change the destination register to a 32-bit one or add a sub-register index.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@81496 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 46e63b2..859ad57 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2206,6 +2206,7 @@
       unsigned MinAlign = I->second.second;
       if (Align < MinAlign)
         return NULL;
+      bool NarrowToMOV32rm = false;
       if (Size) {
         unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
         if (Size < RCSize) {
@@ -2216,7 +2217,10 @@
           // If this is a 64-bit load, but the spill slot is 32, then we can do
           // a 32-bit load which is implicitly zero-extended. This likely is due
           // to liveintervalanalysis remat'ing a load from stack slot.
+          if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+            return NULL;
           Opcode = X86::MOV32rm;
+          NarrowToMOV32rm = true;
         }
       }
 
@@ -2224,6 +2228,18 @@
         NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
       else
         NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+      if (NarrowToMOV32rm) {
+        // If this is the special case where we use a MOV32rm to load a 32-bit
+        // value and zero-extend the top bits. Change the destination register
+        // to a 32-bit one.
+        unsigned DstReg = NewMI->getOperand(0).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+          NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+                                                   4/*x86_subreg_32bit*/));
+        else
+          NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+      }
       return NewMI;
     }
   }