EXTRACT_SUBREG coalescing support. The coalescer now treats EXTRACT_SUBREG like
(almost) a register copy. However, it always coalesced to the register of the
RHS (the super-register). All uses of the result of a EXTRACT_SUBREG are sub-
register uses which adds subtle complications to load folding, spiller rewrite,
etc.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42899 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index f205d0c..b2f7d7f 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -383,6 +383,26 @@
 }
 
 
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, but only if the overlapping LiveRanges have the
+/// specified value number.
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+                                       VNInfo *RHSValNo, VNInfo *LHSValNo) {
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    if (I->valno != RHSValNo)
+      continue;
+    // Map the valno in the other live range to the current live range.
+    LiveRange Tmp = *I;
+    Tmp.valno = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
+  }
+}
+
+
 /// MergeInClobberRanges - For any live ranges that are not defined in the
 /// current interval, but are defined in the Clobbers interval, mark them
 /// used with an unknown definition value.
@@ -485,6 +505,23 @@
   }
 }
 
+void LiveInterval::Copy(const LiveInterval &RHS,
+                        BumpPtrAllocator &VNInfoAllocator) {
+  ranges.clear();
+  valnos.clear();
+  preference = RHS.preference;
+  weight = RHS.weight;
+  for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+    const VNInfo *VNI = RHS.getValNumInfo(i);
+    VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
+    copyValNumInfo(NewVNI, VNI);
+  }
+  for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+    const LiveRange &LR = RHS.ranges[i];
+    addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+  }
+}
+
 unsigned LiveInterval::getSize() const {
   unsigned Sum = 0;
   for (const_iterator I = begin(), E = end(); I != E; ++I)
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 5ada75c..5ed24a3 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -136,75 +136,6 @@
   }
 }
 
-// Not called?
-/// CreateNewLiveInterval - Create a new live interval with the given live
-/// ranges. The new live interval will have an infinite spill weight.
-LiveInterval&
-LiveIntervals::CreateNewLiveInterval(const LiveInterval *LI,
-                                     const std::vector<LiveRange> &LRs) {
-  const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(LI->reg);
-
-  // Create a new virtual register for the spill interval.
-  unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(RC);
-
-  // Replace the old virtual registers in the machine operands with the shiny
-  // new one.
-  for (std::vector<LiveRange>::const_iterator
-         I = LRs.begin(), E = LRs.end(); I != E; ++I) {
-    unsigned Index = getBaseIndex(I->start);
-    unsigned End = getBaseIndex(I->end - 1) + InstrSlots::NUM;
-
-    for (; Index != End; Index += InstrSlots::NUM) {
-      // Skip deleted instructions
-      while (Index != End && !getInstructionFromIndex(Index))
-        Index += InstrSlots::NUM;
-
-      if (Index == End) break;
-
-      MachineInstr *MI = getInstructionFromIndex(Index);
-
-      for (unsigned J = 0, e = MI->getNumOperands(); J != e; ++J) {
-        MachineOperand &MOp = MI->getOperand(J);
-        if (MOp.isRegister() && MOp.getReg() == LI->reg)
-          MOp.setReg(NewVReg);
-      }
-    }
-  }
-
-  LiveInterval &NewLI = getOrCreateInterval(NewVReg);
-
-  // The spill weight is now infinity as it cannot be spilled again
-  NewLI.weight = float(HUGE_VAL);
-
-  for (std::vector<LiveRange>::const_iterator
-         I = LRs.begin(), E = LRs.end(); I != E; ++I) {
-    DOUT << "  Adding live range " << *I << " to new interval\n";
-    NewLI.addRange(*I);
-  }
-            
-  DOUT << "Created new live interval " << NewLI << "\n";
-  return NewLI;
-}
-
-/// isReDefinedByTwoAddr - Returns true if the Reg re-definition is due to
-/// two addr elimination.
-static bool isReDefinedByTwoAddr(MachineInstr *MI, unsigned Reg,
-                                const TargetInstrInfo *TII) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO1 = MI->getOperand(i);
-    if (MO1.isRegister() && MO1.isDef() && MO1.getReg() == Reg) {
-      for (unsigned j = i+1; j < e; ++j) {
-        MachineOperand &MO2 = MI->getOperand(j);
-        if (MO2.isRegister() && MO2.isUse() && MO2.getReg() == Reg &&
-            MI->getInstrDescriptor()->
-            getOperandConstraint(j, TOI::TIED_TO) == (int)i)
-          return true;
-      }
-    }
-  }
-  return false;
-}
-
 /// isReMaterializable - Returns true if the definition MI of the specified
 /// val# of the specified interval is re-materializable.
 bool LiveIntervals::isReMaterializable(const LiveInterval &li,
@@ -232,7 +163,7 @@
       continue; // Dead val#.
     MachineInstr *DefMI = (DefIdx == ~0u)
       ? NULL : getInstructionFromIndex(DefIdx);
-    if (DefMI && isReDefinedByTwoAddr(DefMI, li.reg, tii_))
+    if (DefMI && DefMI->isRegReDefinedByTwoAddr(li.reg))
       return false;
   }
   return true;
@@ -243,9 +174,9 @@
 /// MI. If it is successul, MI is updated with the newly created MI and
 /// returns true.
 bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
+                                         MachineInstr *DefMI,
                                          unsigned index, unsigned i,
-                                         bool isSS, MachineInstr *DefMI,
-                                         int slot, unsigned reg) {
+                                         bool isSS, int slot, unsigned reg) {
   MachineInstr *fmi = isSS
     ? mri_->foldMemoryOperand(MI, i, slot)
     : mri_->foldMemoryOperand(MI, i, DefMI);
@@ -281,7 +212,8 @@
   li.print(DOUT, mri_);
   DOUT << '\n';
 
-  const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(li.reg);
+  SSARegMap *RegMap = mf_->getSSARegMap();
+  const TargetRegisterClass* rc = RegMap->getRegClass(li.reg);
 
   unsigned NumValNums = li.getNumValNums();
   SmallVector<MachineInstr*, 4> ReMatDefs;
@@ -364,113 +296,126 @@
     RestartInstruction:
       for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
         MachineOperand& mop = MI->getOperand(i);
-        if (mop.isRegister() && mop.getReg() == li.reg) {
-          if (DefIsReMat) {
-            // If this is the rematerializable definition MI itself and
-            // all of its uses are rematerialized, simply delete it.
-            if (MI == OrigDefMI) {
-              if (CanDelete) {
-                RemoveMachineInstrFromMaps(MI);
-                MI->eraseFromParent();
-                break;
-              } else if (tryFoldMemoryOperand(MI, vrm, index, i, true,
-                                              DefMI, slot, li.reg)) {
-                // Folding the load/store can completely change the instruction
-                // in unpredictable ways, rescan it from the beginning.
-                goto RestartInstruction;
-              }
-            } else if (isLoad &&
-                       tryFoldMemoryOperand(MI, vrm, index, i, isLoadSS,
-                                            DefMI, LdSlot, li.reg))
-                // Folding the load/store can completely change the
-                // instruction in unpredictable ways, rescan it from
-                // the beginning.
-                goto RestartInstruction;
-          } else {
-            if (tryFoldMemoryOperand(MI,  vrm, index, i, true, DefMI,
-                                     slot, li.reg))
-              // Folding the load/store can completely change the instruction in
-              // unpredictable ways, rescan it from the beginning.
-              goto RestartInstruction;
+        if (!mop.isRegister())
+          continue;
+        unsigned Reg = mop.getReg();
+        if (Reg == 0 || MRegisterInfo::isPhysicalRegister(Reg))
+          continue;
+        bool isSubReg = RegMap->isSubRegister(Reg);
+        unsigned SubIdx = 0;
+        if (isSubReg) {
+          SubIdx = RegMap->getSubRegisterIndex(Reg);
+          Reg = RegMap->getSuperRegister(Reg);
+        }
+        if (Reg != li.reg)
+          continue;
+
+        bool TryFold = !DefIsReMat;
+        bool FoldSS = true;
+        int FoldSlot = slot;
+        if (DefIsReMat) {
+          // If this is the rematerializable definition MI itself and
+          // all of its uses are rematerialized, simply delete it.
+          if (MI == OrigDefMI && CanDelete) {
+            RemoveMachineInstrFromMaps(MI);
+            MI->eraseFromParent();
+            break;
           }
 
-          // Create a new virtual register for the spill interval.
-          unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(rc);
-            
-          // Scan all of the operands of this instruction rewriting operands
-          // to use NewVReg instead of li.reg as appropriate.  We do this for
-          // two reasons:
-          //
-          //   1. If the instr reads the same spilled vreg multiple times, we
-          //      want to reuse the NewVReg.
-          //   2. If the instr is a two-addr instruction, we are required to
-          //      keep the src/dst regs pinned.
-          //
-          // Keep track of whether we replace a use and/or def so that we can
-          // create the spill interval with the appropriate range. 
-          mop.setReg(NewVReg);
-            
-          bool HasUse = mop.isUse();
-          bool HasDef = mop.isDef();
-          for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
-            if (MI->getOperand(j).isRegister() &&
-                MI->getOperand(j).getReg() == li.reg) {
-              MI->getOperand(j).setReg(NewVReg);
-              HasUse |= MI->getOperand(j).isUse();
-              HasDef |= MI->getOperand(j).isDef();
-            }
+          // If def for this use can't be rematerialized, then try folding.
+          TryFold = !OrigDefMI || (OrigDefMI && (MI == OrigDefMI || isLoad));
+          if (isLoad) {
+            // Try fold loads (from stack slot, constant pool, etc.) into uses.
+            FoldSS = isLoadSS;
+            FoldSlot = LdSlot;
           }
+        }
 
-          vrm.grow();
-          if (DefIsReMat) {
-            vrm.setVirtIsReMaterialized(NewVReg, DefMI/*, CanDelete*/);
-            if (ReMatIds[I->valno->id] == VirtRegMap::MAX_STACK_SLOT) {
-              // Each valnum may have its own remat id.
-              ReMatIds[I->valno->id] = vrm.assignVirtReMatId(NewVReg);
-            } else {
-              vrm.assignVirtReMatId(NewVReg, ReMatIds[I->valno->id]);
-            }
-            if (!CanDelete || (HasUse && HasDef)) {
-              // If this is a two-addr instruction then its use operands are
-              // rematerializable but its def is not. It should be assigned a
-              // stack slot.
-              vrm.assignVirt2StackSlot(NewVReg, slot);
-            }
+        // FIXME: fold subreg use
+        if (!isSubReg && TryFold &&
+            tryFoldMemoryOperand(MI, vrm, DefMI, index, i, FoldSS, FoldSlot, Reg))
+          // Folding the load/store can completely change the instruction in
+          // unpredictable ways, rescan it from the beginning.
+          goto RestartInstruction;
+
+        // Create a new virtual register for the spill interval.
+        unsigned NewVReg = RegMap->createVirtualRegister(rc);
+        if (isSubReg)
+          RegMap->setIsSubRegister(NewVReg, NewVReg, SubIdx);
+            
+        // Scan all of the operands of this instruction rewriting operands
+        // to use NewVReg instead of li.reg as appropriate.  We do this for
+        // two reasons:
+        //
+        //   1. If the instr reads the same spilled vreg multiple times, we
+        //      want to reuse the NewVReg.
+        //   2. If the instr is a two-addr instruction, we are required to
+        //      keep the src/dst regs pinned.
+        //
+        // Keep track of whether we replace a use and/or def so that we can
+        // create the spill interval with the appropriate range. 
+        mop.setReg(NewVReg);
+            
+        bool HasUse = mop.isUse();
+        bool HasDef = mop.isDef();
+        for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+          if (MI->getOperand(j).isRegister() &&
+              MI->getOperand(j).getReg() == li.reg) {
+            MI->getOperand(j).setReg(NewVReg);
+            HasUse |= MI->getOperand(j).isUse();
+            HasDef |= MI->getOperand(j).isDef();
+          }
+        }
+
+        vrm.grow();
+        if (DefIsReMat) {
+          vrm.setVirtIsReMaterialized(NewVReg, DefMI/*, CanDelete*/);
+          if (ReMatIds[I->valno->id] == VirtRegMap::MAX_STACK_SLOT) {
+            // Each valnum may have its own remat id.
+            ReMatIds[I->valno->id] = vrm.assignVirtReMatId(NewVReg);
           } else {
+            vrm.assignVirtReMatId(NewVReg, ReMatIds[I->valno->id]);
+          }
+          if (!CanDelete || (HasUse && HasDef)) {
+            // If this is a two-addr instruction then its use operands are
+            // rematerializable but its def is not. It should be assigned a
+            // stack slot.
             vrm.assignVirt2StackSlot(NewVReg, slot);
           }
-
-          // create a new register interval for this spill / remat.
-          LiveInterval &nI = getOrCreateInterval(NewVReg);
-          assert(nI.empty());
-
-          // the spill weight is now infinity as it
-          // cannot be spilled again
-          nI.weight = HUGE_VALF;
-
-          if (HasUse) {
-            LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
-                         nI.getNextValue(~0U, 0, VNInfoAllocator));
-            DOUT << " +" << LR;
-            nI.addRange(LR);
-          }
-          if (HasDef) {
-            LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                         nI.getNextValue(~0U, 0, VNInfoAllocator));
-            DOUT << " +" << LR;
-            nI.addRange(LR);
-          }
-            
-          added.push_back(&nI);
-
-          // update live variables if it is available
-          if (lv_)
-            lv_->addVirtualRegisterKilled(NewVReg, MI);
-            
-          DOUT << "\t\t\t\tadded new interval: ";
-          nI.print(DOUT, mri_);
-          DOUT << '\n';
+        } else {
+          vrm.assignVirt2StackSlot(NewVReg, slot);
         }
+
+        // create a new register interval for this spill / remat.
+        LiveInterval &nI = getOrCreateInterval(NewVReg);
+        assert(nI.empty());
+
+        // the spill weight is now infinity as it
+        // cannot be spilled again
+        nI.weight = HUGE_VALF;
+
+        if (HasUse) {
+          LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
+                       nI.getNextValue(~0U, 0, VNInfoAllocator));
+          DOUT << " +" << LR;
+          nI.addRange(LR);
+        }
+        if (HasDef) {
+          LiveRange LR(getDefIndex(index), getStoreIndex(index),
+                       nI.getNextValue(~0U, 0, VNInfoAllocator));
+          DOUT << " +" << LR;
+          nI.addRange(LR);
+        }
+            
+        added.push_back(&nI);
+
+        // update live variables if it is available
+        if (lv_)
+          lv_->addVirtualRegisterKilled(NewVReg, MI);
+            
+        DOUT << "\t\t\t\tadded new interval: ";
+        nI.print(DOUT, mri_);
+        DOUT << '\n';
       }
     }
   }
@@ -501,10 +446,14 @@
     unsigned defIndex = getDefIndex(MIIdx);
     VNInfo *ValNo;
     unsigned SrcReg, DstReg;
-    if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
-      ValNo = interval.getNextValue(defIndex, 0, VNInfoAllocator);
-    else
+    if (tii_->isMoveInstr(*mi, SrcReg, DstReg))
       ValNo = interval.getNextValue(defIndex, SrcReg, VNInfoAllocator);
+    else if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+             mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
+      ValNo = interval.getNextValue(defIndex, mi->getOperand(1).getReg(),
+                                    VNInfoAllocator);
+    else
+      ValNo = interval.getNextValue(defIndex, 0, VNInfoAllocator);
 
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
@@ -576,7 +525,7 @@
     // must be due to phi elimination or two addr elimination.  If this is
     // the result of two address elimination, then the vreg is one of the
     // def-and-use register operand.
-    if (isReDefinedByTwoAddr(mi, interval.reg, tii_)) {
+    if (mi->isRegReDefinedByTwoAddr(interval.reg)) {
       // If this is a two-address definition, then we have already processed
       // the live range.  The only problem is that we didn't realize there
       // are actually two values in the live interval.  Because of this we
@@ -656,10 +605,13 @@
       
       VNInfo *ValNo;
       unsigned SrcReg, DstReg;
-      if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
-        ValNo = interval.getNextValue(defIndex, 0, VNInfoAllocator);
-      else
+      if (tii_->isMoveInstr(*mi, SrcReg, DstReg))
         ValNo = interval.getNextValue(defIndex, SrcReg, VNInfoAllocator);
+      else if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+        ValNo = interval.getNextValue(defIndex, mi->getOperand(1).getReg(),
+                                      VNInfoAllocator);
+      else
+        ValNo = interval.getNextValue(defIndex, 0, VNInfoAllocator);
       
       unsigned killIndex = getInstructionIndex(&mbb->back()) + InstrSlots::NUM;
       LiveRange LR(defIndex, killIndex, ValNo);
@@ -741,7 +693,9 @@
     handleVirtualRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg));
   else if (allocatableRegs_[reg]) {
     unsigned SrcReg, DstReg;
-    if (!tii_->isMoveInstr(*MI, SrcReg, DstReg))
+    if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+      SrcReg = MI->getOperand(1).getReg();
+    else if (!tii_->isMoveInstr(*MI, SrcReg, DstReg))
       SrcReg = 0;
     handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg), SrcReg);
     // Def of a register also defines its sub-registers.
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 1634c78..1ac955a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -220,6 +220,24 @@
   return -1;
 }
   
+/// isRegReDefinedByTwoAddr - Returns true if the Reg re-definition is due
+/// to two addr elimination.
+bool MachineInstr::isRegReDefinedByTwoAddr(unsigned Reg) const {
+  const TargetInstrDescriptor *TID = getInstrDescriptor();
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO1 = getOperand(i);
+    if (MO1.isRegister() && MO1.isDef() && MO1.getReg() == Reg) {
+      for (unsigned j = i+1; j < e; ++j) {
+        const MachineOperand &MO2 = getOperand(j);
+        if (MO2.isRegister() && MO2.isUse() && MO2.getReg() == Reg &&
+            TID->getOperandConstraint(j, TOI::TIED_TO) == (int)i)
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// copyKillDeadInfo - Copies kill / dead operand properties from MI.
 ///
 void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
index b616b7e..bb5379c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
@@ -567,9 +567,6 @@
     // TODO: If the node is a use of a CopyFromReg from a physical register
     // fold the extract into the copy now
 
-    // TODO: Add tracking info to SSARegMap of which vregs are subregs
-    // to allow coalescing in the allocator
-    
     // Create the extract_subreg machine instruction.
     MachineInstr *MI =
       new MachineInstr(BB, TII->get(TargetInstrInfo::EXTRACT_SUBREG));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c1a8509..2baf24d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1097,6 +1097,11 @@
         // CopyToReg should be close to its uses to facilitate coalescing and
         // avoid spilling.
         return 0;
+      else if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+               Opc == TargetInstrInfo::INSERT_SUBREG)
+        // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
+        // facilitate coalescing.
+        return 0;
       else if (SU->NumSuccs == 0)
         // If SU does not have a use, i.e. it doesn't produce a value that would
         // be consumed (e.g. store), then it terminates a chain of computation.
@@ -1308,6 +1313,14 @@
           // Be conservative. Ignore if nodes aren't at the same depth.
           if (SuccSU->Depth != SU->Depth)
             continue;
+          if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode())
+            continue;
+          // Don't constraint extract_subreg / insert_subreg these may be
+          // coalesced away. We don't them close to their uses.
+          unsigned SuccOpc = SuccSU->Node->getTargetOpcode();
+          if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
+              SuccOpc == TargetInstrInfo::INSERT_SUBREG)
+            continue;
           if ((!canClobber(SuccSU, DUSU) ||
                (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
                (!SU->isCommutable && SuccSU->isCommutable)) &&
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 59e9e45..9d43750 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -226,11 +226,55 @@
     DOUT << "\tDst reg is unallocatable physreg.\n";
     return true;  // Not coalescable.
   }
-  
-  // If they are not of the same register class, we cannot join them.
-  if (differingRegisterClasses(repSrcReg, repDstReg)) {
+
+  bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
+  unsigned RealDstReg = 0;
+  if (isExtSubReg) {
+    unsigned SubIdx = CopyMI->getOperand(2).getImm();
+    if (SrcIsPhys)
+      // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
+      // coalesced with AX.
+      repSrcReg = mri_->getSubReg(repSrcReg, SubIdx);
+    else if (DstIsPhys) {
+      // If this is a extract_subreg where dst is a physical register, e.g.
+      // cl = EXTRACT_SUBREG reg1024, 1
+      // then create and update the actual physical register allocated to RHS.
+      const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(SrcReg);
+      for (const unsigned *SRs = mri_->getSuperRegisters(repDstReg);
+           unsigned SR = *SRs; ++SRs) {
+        if (repDstReg == mri_->getSubReg(SR, SubIdx) &&
+            RC->contains(SR)) {
+          RealDstReg = SR;
+          break;
+        }
+      }
+      assert(RealDstReg && "Invalid extra_subreg instruction!");
+
+      // For this type of EXTRACT_SUBREG, conservatively
+      // check if the live interval of the source register interfere with the
+      // actual super physical register we are trying to coalesce with.
+      LiveInterval &RHS = li_->getInterval(repSrcReg);
+      if (li_->hasInterval(RealDstReg) &&
+          RHS.overlaps(li_->getInterval(RealDstReg))) {
+        DOUT << "Interfere with register ";
+        DEBUG(li_->getInterval(RealDstReg).print(DOUT, mri_));
+        return true; // Not coalescable
+      }
+      for (const unsigned* SR = mri_->getSubRegisters(RealDstReg); *SR; ++SR)
+        if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+          DOUT << "Interfere with sub-register ";
+          DEBUG(li_->getInterval(*SR).print(DOUT, mri_));
+          return true;
+        }
+    }
+  } else if (differingRegisterClasses(repSrcReg, repDstReg)) {
+    // If they are not of the same register class, we cannot join them.
     DOUT << "\tSrc/Dest are different register classes.\n";
-    return true;  // Not coalescable.
+    // Allow the coalescer to try again in case either side gets coalesced to
+    // a physical register that's compatible with the other side. e.g.
+    // r1024 = MOV32to32_ r1025
+    // but later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+    return false;
   }
   
   LiveInterval &SrcInt = li_->getInterval(repSrcReg);
@@ -286,14 +330,14 @@
   // virtual register. Once the coalescing is done, it cannot be broken and
   // these are not spillable! If the destination interval uses are far away,
   // think twice about coalescing them!
-  if (!mopd->isDead() && (SrcIsPhys || DstIsPhys)) {
+  if (!mopd->isDead() && (SrcIsPhys || DstIsPhys) && !isExtSubReg) {
     LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
     unsigned JoinVReg = SrcIsPhys ? repDstReg : repSrcReg;
     unsigned JoinPReg = SrcIsPhys ? repSrcReg : repDstReg;
     const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(JoinVReg);
     unsigned Threshold = allocatableRCRegs_[RC].count();
 
-    // If the virtual register live interval is long has it has low use desity,
+    // If the virtual register live interval is long but it has low use desity,
     // do not join them, instead mark the physical register as its allocation
     // preference.
     unsigned Length = JoinVInt.getSize() / InstrSlots::NUM;
@@ -340,7 +384,7 @@
     // Coalescing failed.
     
     // If we can eliminate the copy without merging the live ranges, do so now.
-    if (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI))
+    if (!isExtSubReg && AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI))
       return true;
 
     // Otherwise, we are unable to join the intervals.
@@ -368,9 +412,24 @@
         unsetRegisterKills(I->start, I->end, repDstReg);
     }
 
+    // If this is a extract_subreg where dst is a physical register, e.g.
+    // cl = EXTRACT_SUBREG reg1024, 1
+    // then create and update the actual physical register allocated to RHS.
+    if (RealDstReg) {
+      unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+      VNInfo *DstValNo =
+        ResDstInt->getLiveRangeContaining(li_->getUseIndex(CopyIdx))->valno;
+      LiveInterval &RealDstInt = li_->getOrCreateInterval(RealDstReg);
+      VNInfo *ValNo = RealDstInt.getNextValue(DstValNo->def, DstValNo->reg,
+                                              li_->getVNInfoAllocator());
+      RealDstInt.addKills(ValNo, DstValNo->kills);
+      RealDstInt.MergeValueInAsValue(*ResDstInt, DstValNo, ValNo);
+      repDstReg = RealDstReg;
+    }
+
     // Update the liveintervals of sub-registers.
     for (const unsigned *AS = mri_->getSubRegisters(repDstReg); *AS; ++AS)
-      li_->getInterval(*AS).MergeInClobberRanges(*ResSrcInt,
+      li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt,
                                                  li_->getVNInfoAllocator());
   } else {
     // Merge use info if the destination is a virtual register.
@@ -379,14 +438,25 @@
     dVI.NumUses += sVI.NumUses;
   }
 
-  DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, mri_);
-  DOUT << "\n";
-
   // Remember these liveintervals have been joined.
   JoinedLIs.set(repSrcReg - MRegisterInfo::FirstVirtualRegister);
   if (MRegisterInfo::isVirtualRegister(repDstReg))
     JoinedLIs.set(repDstReg - MRegisterInfo::FirstVirtualRegister);
 
+  if (isExtSubReg && !SrcIsPhys && !DstIsPhys) {
+    if (!Swapped) {
+      // Make sure we allocate the larger super-register.
+      ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+      std::swap(repSrcReg, repDstReg);
+      std::swap(ResSrcInt, ResDstInt);
+    }
+    SubRegIdxes.push_back(std::make_pair(repSrcReg,
+                                         CopyMI->getOperand(2).getImm()));
+  }
+
+  DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, mri_);
+  DOUT << "\n";
+
   // repSrcReg is guarateed to be the register whose live interval that is
   // being merged.
   li_->removeInterval(repSrcReg);
@@ -857,9 +927,13 @@
        MII != E;) {
     MachineInstr *Inst = MII++;
     
-    // If this isn't a copy, we can't join intervals.
+    // If this isn't a copy nor a extract_subreg, we can't join intervals.
     unsigned SrcReg, DstReg;
-    if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue;
+    if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(1).getReg();
+    } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg))
+      continue;
     
     bool Done = JoinCopy(Inst, SrcReg, DstReg, PhysOnly);
     if (TryAgain && !Done)
@@ -950,7 +1024,7 @@
 /// Return true if the two specified registers belong to different register
 /// classes.  The registers may be either phys or virt regs.
 bool SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
-                                             unsigned RegB) const {
+                                                        unsigned RegB) const {
 
   // Get the register classes for the first reg.
   if (MRegisterInfo::isPhysicalRegister(RegA)) {
@@ -1074,6 +1148,7 @@
 void SimpleRegisterCoalescing::releaseMemory() {
    r2rMap_.clear();
    JoinedLIs.clear();
+   SubRegIdxes.clear();
 }
 
 static bool isZeroLengthInterval(LiveInterval *li) {
@@ -1101,7 +1176,8 @@
          E = mri_->regclass_end(); I != E; ++I)
     allocatableRCRegs_.insert(std::make_pair(*I,mri_->getAllocatableSet(fn, *I)));
 
-  r2rMap_.grow(mf_->getSSARegMap()->getLastVirtReg());
+  SSARegMap *RegMap = mf_->getSSARegMap();
+  r2rMap_.grow(RegMap->getLastVirtReg());
 
   // Join (coalesce) intervals if requested.
   if (EnableJoining) {
@@ -1111,6 +1187,13 @@
       I->second.print(DOUT, mri_);
       DOUT << "\n";
     }
+
+    // Track coalesced sub-registers.
+    while (!SubRegIdxes.empty()) {
+      std::pair<unsigned, unsigned> RI = SubRegIdxes.back();
+      SubRegIdxes.pop_back();
+      mf_->getSSARegMap()->setIsSubRegister(RI.first, rep(RI.first), RI.second);
+    }
   }
 
   // perform a final pass over the instructions and compute spill
@@ -1150,8 +1233,14 @@
           if (mop.isRegister() && mop.getReg() &&
               MRegisterInfo::isVirtualRegister(mop.getReg())) {
             // replace register with representative register
-            unsigned reg = rep(mop.getReg());
-            mii->getOperand(i).setReg(reg);
+            unsigned OrigReg = mop.getReg();
+            unsigned reg = rep(OrigReg);
+            // Don't rewrite if it is a sub-register of a virtual register.
+            if (!RegMap->isSubRegister(OrigReg))
+              mii->getOperand(i).setReg(reg);
+            else if (MRegisterInfo::isPhysicalRegister(reg))
+              mii->getOperand(i).setReg(mri_->getSubReg(reg,
+                                         RegMap->getSubRegisterIndex(OrigReg)));
 
             // Multiple uses of reg by the same instruction. It should not
             // contribute to spill weight again.
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index c6ac4b2..ebdbb86 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -242,10 +242,12 @@
   /// blocks that have low register pressure (the vreg may be spilled due to
   /// register pressure in other blocks).
   class VISIBILITY_HIDDEN LocalSpiller : public Spiller {
+    SSARegMap *RegMap;
     const MRegisterInfo *MRI;
     const TargetInstrInfo *TII;
   public:
     bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+      RegMap = MF.getSSARegMap();
       MRI = MF.getTarget().getRegisterInfo();
       TII = MF.getTarget().getInstrInfo();
       DOUT << "\n**** Local spiller rewriting function '"
@@ -776,25 +778,33 @@
       if (!MO.isRegister() || MO.getReg() == 0)
         continue;   // Ignore non-register operands.
       
-      if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+      unsigned VirtReg = MO.getReg();
+      if (MRegisterInfo::isPhysicalRegister(VirtReg)) {
         // Ignore physregs for spilling, but remember that it is used by this
         // function.
-        MF.setPhysRegUsed(MO.getReg());
-        ReusedOperands.markClobbered(MO.getReg());
+        MF.setPhysRegUsed(VirtReg);
+        ReusedOperands.markClobbered(VirtReg);
         continue;
       }
       
-      assert(MRegisterInfo::isVirtualRegister(MO.getReg()) &&
+      assert(MRegisterInfo::isVirtualRegister(VirtReg) &&
              "Not a virtual or a physical register?");
       
-      unsigned VirtReg = MO.getReg();
+      unsigned SubIdx = 0;
+      bool isSubReg = RegMap->isSubRegister(VirtReg);
+      if (isSubReg) {
+        SubIdx = RegMap->getSubRegisterIndex(VirtReg);
+        VirtReg = RegMap->getSuperRegister(VirtReg);
+      }
+
       if (VRM.isAssignedReg(VirtReg)) {
         // This virtual register was assigned a physreg!
         unsigned Phys = VRM.getPhys(VirtReg);
         MF.setPhysRegUsed(Phys);
         if (MO.isDef())
           ReusedOperands.markClobbered(Phys);
-        MI.getOperand(i).setReg(Phys);
+        unsigned RReg = isSubReg ? MRI->getSubReg(Phys, SubIdx) : Phys;
+        MI.getOperand(i).setReg(RReg);
         continue;
       }
       
@@ -817,6 +827,24 @@
         if (ReuseSlot != VirtRegMap::NO_STACK_SLOT)
           PhysReg = Spills.getSpillSlotOrReMatPhysReg(ReuseSlot);
       }
+
+      // If this is a sub-register use, make sure the reuse register is in the
+      // right register class. For example, for x86 not all of the 32-bit
+      // registers have accessible sub-registers.
+      // Similarly so for EXTRACT_SUBREG. Consider this:
+      // EDI = op
+      // MOV32_mr fi#1, EDI
+      // ...
+      //       = EXTRACT_SUBREG fi#1
+      // fi#1 is available in EDI, but it cannot be reused because it's not in
+      // the right register file.
+      if (PhysReg &&
+          (isSubReg || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) {
+        const TargetRegisterClass* RC = RegMap->getRegClass(VirtReg);
+        if (!RC->contains(PhysReg))
+          PhysReg = 0;
+      }
+
       if (PhysReg) {
         // This spilled operand might be part of a two-address operand.  If this
         // is the case, then changing it will necessarily require changing the 
@@ -824,6 +852,7 @@
         // aren't allowed to modify the reused register.  If none of these cases
         // apply, reuse it.
         bool CanReuse = true;
+
         int ti = TID->getOperandConstraint(i, TOI::TIED_TO);
         if (ti != -1 &&
             MI.getOperand(ti).isRegister() && 
@@ -845,7 +874,8 @@
                << MRI->getName(PhysReg) << " for vreg"
                << VirtReg <<" instead of reloading into physreg "
                << MRI->getName(VRM.getPhys(VirtReg)) << "\n";
-          MI.getOperand(i).setReg(PhysReg);
+          unsigned RReg = isSubReg ? MRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+          MI.getOperand(i).setReg(RReg);
 
           // The only technical detail we have is that we don't know that
           // PhysReg won't be clobbered by a reloaded stack slot that occurs
@@ -883,7 +913,7 @@
             }
           }
           continue;
-        }
+        }  // CanReuse
         
         // Otherwise we have a situation where we have a two-address instruction
         // whose mod/ref operand needs to be reloaded.  This reload is already
@@ -917,13 +947,14 @@
           DOUT << " from physreg " << MRI->getName(PhysReg) << " for vreg"
                << VirtReg
                << " instead of reloading into same physreg.\n";
-          MI.getOperand(i).setReg(PhysReg);
+          unsigned RReg = isSubReg ? MRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+          MI.getOperand(i).setReg(RReg);
           ReusedOperands.markClobbered(PhysReg);
           ++NumReused;
           continue;
         }
         
-        const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
+        const TargetRegisterClass* RC = RegMap->getRegClass(VirtReg);
         MF.setPhysRegUsed(DesignatedReg);
         ReusedOperands.markClobbered(DesignatedReg);
         MRI->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC);
@@ -935,16 +966,17 @@
         Spills.ClobberPhysReg(DesignatedReg);
         
         Spills.addAvailable(ReuseSlot, &MI, DesignatedReg);
-        MI.getOperand(i).setReg(DesignatedReg);
+        unsigned RReg =
+          isSubReg ? MRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+        MI.getOperand(i).setReg(RReg);
         DOUT << '\t' << *prior(MII);
         ++NumReused;
         continue;
-      }
+      } // is (PhysReg)
       
       // Otherwise, reload it and remember that we have it.
       PhysReg = VRM.getPhys(VirtReg);
       assert(PhysReg && "Must map virtreg to physreg!");
-      const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
 
       // Note that, if we reused a register for a previous operand, the
       // register we want to reload into might not actually be
@@ -960,6 +992,7 @@
         MRI->reMaterialize(MBB, &MI, PhysReg, VRM.getReMaterializedMI(VirtReg));
         ++NumReMats;
       } else {
+        const TargetRegisterClass* RC = RegMap->getRegClass(VirtReg);
         MRI->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC);
         ++NumLoads;
       }
@@ -974,7 +1007,8 @@
       // unless it's a two-address operand.
       if (TID->getOperandConstraint(i, TOI::TIED_TO) == -1)
         MI.getOperand(i).setIsKill();
-      MI.getOperand(i).setReg(PhysReg);
+      unsigned RReg = isSubReg ? MRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      MI.getOperand(i).setReg(RReg);
       UpdateKills(*prior(MII), RegKills, KillOps);
       DOUT << '\t' << *prior(MII);
     }
@@ -1002,30 +1036,28 @@
       // straight load from the virt reg slot.
       if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
         int FrameIdx;
-        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
-          if (FrameIdx == SS) {
-            // If this spill slot is available, turn it into a copy (or nothing)
-            // instead of leaving it as a load!
-            if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DOUT << "Promoted Load To Copy: " << MI;
-              if (DestReg != InReg) {
-                const TargetRegisterClass *RC =
-                  MF.getSSARegMap()->getRegClass(VirtReg);
-                MRI->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
-                // Revisit the copy so we make sure to notice the effects of the
-                // operation on the destreg (either needing to RA it if it's 
-                // virtual or needing to clobber any values if it's physical).
-                NextMII = &MI;
-                --NextMII;  // backtrack to the copy.
-                BackTracked = true;
-              } else
-                DOUT << "Removing now-noop copy: " << MI;
+        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+        if (DestReg && FrameIdx == SS) {
+          // If this spill slot is available, turn it into a copy (or nothing)
+          // instead of leaving it as a load!
+          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+            DOUT << "Promoted Load To Copy: " << MI;
+            if (DestReg != InReg) {
+              const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
+              MRI->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
+              // Revisit the copy so we make sure to notice the effects of the
+              // operation on the destreg (either needing to RA it if it's 
+              // virtual or needing to clobber any values if it's physical).
+              NextMII = &MI;
+              --NextMII;  // backtrack to the copy.
+              BackTracked = true;
+            } else
+              DOUT << "Removing now-noop copy: " << MI;
 
-              VRM.RemoveFromFoldedVirtMap(&MI);
-              MBB.erase(&MI);
-              Erased = true;
-              goto ProcessNextInst;
-            }
+            VRM.RemoveFromFoldedVirtMap(&MI);
+            MBB.erase(&MI);
+            Erased = true;
+            goto ProcessNextInst;
           }
         }
       }
@@ -1121,7 +1153,7 @@
 
         // The only vregs left are stack slot definitions.
         int StackSlot = VRM.getStackSlot(VirtReg);
-        const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(VirtReg);
+        const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
 
         // If this def is part of a two-address operand, make sure to execute
         // the store from the correct physical register.