Revert r345165 "[X86] Bring back the MOV64r0 pseudo instruction"

Google is reporting regressions on some benchmarks.

llvm-svn: 345785
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index b87f480..a49ad8b 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1916,8 +1916,8 @@
     { &X86::GR64RegClass, X86::RAX, X86::RDX, {
         { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
         { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
-        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RAX, U }, // UDiv
-        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RDX, U }, // URem
+        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
+        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
       }
     }, // i64
   };
@@ -1964,22 +1964,26 @@
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
               TII.get(OpEntry.OpSignExtend));
     else {
-      unsigned ZeroReg = createResultReg(VT == MVT::i64 ? &X86::GR64RegClass
-                                                        : &X86::GR32RegClass);
+      unsigned Zero32 = createResultReg(&X86::GR32RegClass);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(OpEntry.OpSignExtend), ZeroReg);
+              TII.get(X86::MOV32r0), Zero32);
 
       // Copy the zero into the appropriate sub/super/identical physical
       // register. Unfortunately the operations needed are not uniform enough
       // to fit neatly into the table above.
-      if (VT == MVT::i16)
+      if (VT == MVT::i16) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(Copy), TypeEntry.HighInReg)
-          .addReg(ZeroReg, 0, X86::sub_16bit);
-      else
+          .addReg(Zero32, 0, X86::sub_16bit);
+      } else if (VT == MVT::i32) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(Copy), TypeEntry.HighInReg)
-            .addReg(ZeroReg);
+            .addReg(Zero32);
+      } else if (VT == MVT::i64) {
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+                TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+            .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
+      }
     }
   }
   // Generate the DIV/IDIV instruction.
@@ -3704,9 +3708,6 @@
 
   uint64_t Imm = CI->getZExtValue();
   if (Imm == 0) {
-    if (VT.SimpleTy == MVT::i64)
-      return fastEmitInst_(X86::MOV64r0, &X86::GR64RegClass);
-
     unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type");
@@ -3719,6 +3720,13 @@
                                         X86::sub_16bit);
     case MVT::i32:
       return SrcReg;
+    case MVT::i64: {
+      unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+        .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+      return ResultReg;
+    }
     }
   }
 
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 717ecc0..16819f4 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3591,10 +3591,7 @@
           SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
       } else {
         // Zero out the high part, effectively zero extending the input.
-        unsigned ClrOpc = NVT.SimpleTy == MVT::i64 ? X86::MOV64r0
-                                                   : X86::MOV32r0;
-        MVT ClrVT = NVT.SimpleTy == MVT::i64 ? MVT::i64 : MVT::i32;
-        SDValue ClrNode = SDValue(CurDAG->getMachineNode(ClrOpc, dl, ClrVT), 0);
+        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
         switch (NVT.SimpleTy) {
         case MVT::i16:
           ClrNode =
@@ -3605,7 +3602,15 @@
                       0);
           break;
         case MVT::i32:
+          break;
         case MVT::i64:
+          ClrNode =
+              SDValue(CurDAG->getMachineNode(
+                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+                          CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
+                          CurDAG->getTargetConstant(X86::sub_32bit, dl,
+                                                    MVT::i32)),
+                      0);
           break;
         default:
           llvm_unreachable("Unexpected division source");
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 2805517..71b43a3 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -270,18 +270,16 @@
 // Alias instruction mapping movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isPseudo = 1, AddedComplexity = 10 in {
+    isPseudo = 1, AddedComplexity = 10 in
 def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
-def MOV64r0  : I<0, Pseudo, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)]>, Sched<[WriteZero]>;
-}
 
 // Other widths can also make use of the 32-bit xor, which may have a smaller
 // encoding and avoid partial register updates.
 let AddedComplexity = 10 in {
 def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
 def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
+def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
 }
 
 let Predicates = [OptForSize, Not64BitMode],
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 88f2f0f..ae45301 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -683,10 +683,8 @@
   if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
     // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
     // effects.
-    unsigned NewOpc = X86::MOV32ri;
     int Value;
     switch (Orig.getOpcode()) {
-    case X86::MOV64r0:  NewOpc = X86::MOV32ri64; Value = 0; break;
     case X86::MOV32r0:  Value = 0; break;
     case X86::MOV32r1:  Value = 1; break;
     case X86::MOV32r_1: Value = -1; break;
@@ -695,7 +693,7 @@
     }
 
     const DebugLoc &DL = Orig.getDebugLoc();
-    BuildMI(MBB, I, DL, get(NewOpc))
+    BuildMI(MBB, I, DL, get(X86::MOV32ri))
         .add(Orig.getOperand(0))
         .addImm(Value);
   } else {
@@ -3752,9 +3750,7 @@
       // MOV32r0 etc. are implemented with xor which clobbers condition code.
       // They are safe to move up, if the definition to EFLAGS is dead and
       // earlier instructions do not read or write EFLAGS.
-      if (!Movr0Inst &&
-          (Instr.getOpcode() == X86::MOV32r0 ||
-           Instr.getOpcode() == X86::MOV64r0) &&
+      if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
           Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
         Movr0Inst = &Instr;
         continue;
@@ -4159,15 +4155,6 @@
   switch (MI.getOpcode()) {
   case X86::MOV32r0:
     return Expand2AddrUndef(MIB, get(X86::XOR32rr));
-  case X86::MOV64r0: {
-    const TargetRegisterInfo *TRI = &getRegisterInfo();
-    unsigned Reg = MIB->getOperand(0).getReg();
-    unsigned Reg32 = TRI->getSubReg(Reg, X86::sub_32bit);
-    MIB->getOperand(0).setReg(Reg32);
-    Expand2AddrUndef(MIB, get(X86::XOR32rr));
-    MIB.addReg(Reg, RegState::ImplicitDefine);
-    return true;
-  }
   case X86::MOV32r1:
     return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
   case X86::MOV32r_1:
@@ -4911,10 +4898,8 @@
     isTwoAddrFold = true;
   } else {
     if (OpNum == 0) {
-      if (MI.getOpcode() == X86::MOV32r0 || MI.getOpcode() == X86::MOV64r0) {
-        unsigned NewOpc = MI.getOpcode() == X86::MOV64r0 ? X86::MOV64mi32
-                                                         : X86::MOV32mi;
-        NewMI = MakeM0Inst(*this, NewOpc, MOs, InsertPt, MI);
+      if (MI.getOpcode() == X86::MOV32r0) {
+        NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
         if (NewMI)
           return NewMI;
       }
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 20997ec..14e4c45 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -487,14 +487,20 @@
     // Otherwise, just build the predicate state itself by zeroing a register
     // as we don't need any initial state.
     PS->InitialReg = MRI->createVirtualRegister(PS->RC);
-    auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64r0),
-                         PS->InitialReg);
+    unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+    auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
+                         PredStateSubReg);
     ++NumInstsInserted;
     MachineOperand *ZeroEFLAGSDefOp =
         ZeroI->findRegisterDefOperand(X86::EFLAGS);
     assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
            "Must have an implicit def of EFLAGS!");
     ZeroEFLAGSDefOp->setIsDead(true);
+    BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
+            PS->InitialReg)
+        .addImm(0)
+        .addReg(PredStateSubReg)
+        .addImm(X86::sub_32bit);
   }
 
   // We're going to need to trace predicate state throughout the function's