The check for coalescing a virtual register to a physical register, e.g.
cl = EXTRACT_SUBREG reg1024, 1, is overly conservative. It should check
for overlaps of vr's live interval with the super registers of the
physical register (ECX in this case) and let JoinIntervals() handle checking
the coalescing feasibility against the physical register (cl in this case).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98251 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index e8856ac..1a2cc25 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -127,11 +127,11 @@
     bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm,
                               unsigned reg);
 
-    /// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
-    /// it can check use as well.
-    bool conflictsWithPhysRegRef(LiveInterval &li, unsigned Reg,
-                                 bool CheckUse,
-                                 SmallPtrSet<MachineInstr*,32> &JoinedCopies);
+    /// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
+    /// it checks for sub-register reference and it can check use as well.
+    bool conflictsWithSubPhysRegRef(LiveInterval &li, unsigned Reg,
+                                    bool CheckUse,
+                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies);
 
     // Interval creation
     LiveInterval &getOrCreateInterval(unsigned reg) {
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index f8b1707..dbb5e19 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -218,9 +218,9 @@
   return false;
 }
 
-/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
-/// it can check use as well.
-bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it checks for sub-register reference and it can check use as well.
+bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
                                             unsigned Reg, bool CheckUse,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index d25df1d..5c62118 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -1260,10 +1260,10 @@
   RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
   assert(RealDstReg && "Invalid extract_subreg instruction!");
 
+  LiveInterval &RHS = li_->getInterval(SrcReg);
   // For this type of EXTRACT_SUBREG, conservatively
   // check if the live interval of the source register interfere with the
   // actual super physical register we are trying to coalesce with.
-  LiveInterval &RHS = li_->getInterval(SrcReg);
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
@@ -1273,7 +1273,11 @@
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check DstReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != DstReg &&
+        !tri_->isSubRegister(DstReg, *SR) &&
+        li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1294,9 +1298,9 @@
   RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
   assert(RealSrcReg && "Invalid extract_subreg instruction!");
 
-  LiveInterval &RHS = li_->getInterval(DstReg);
+  LiveInterval &LHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
-      RHS.overlaps(li_->getInterval(RealSrcReg))) {
+      LHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
         dbgs() << "Interfere with register ";
         li_->getInterval(RealSrcReg).print(dbgs(), tri_);
@@ -1304,7 +1308,11 @@
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check SrcReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != SrcReg &&
+        !tri_->isSubRegister(SrcReg, *SR) &&
+        li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1476,6 +1484,9 @@
         return false; // Not coalescable.
       }
 
+      // FIXME: The following checks are somewhat conservative. Perhaps a better
+      // way to implement this is to treat this as coalescing a vr with the
+      // super physical register.
       if (isExtSubReg) {
         if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
           return false; // Not coalescable
@@ -2205,7 +2216,7 @@
         li_->intervalIsInOneMBB(RHS) &&
         li_->getApproximateInstructionCount(RHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
@@ -2222,7 +2233,7 @@
     if (LHS.containsOneValue() &&
         li_->getApproximateInstructionCount(LHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
new file mode 100644
index 0000000..f23669e
--- /dev/null
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://5571034
+
+define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
+; CHECK: foo:
+entry:
+  %j.03 = add i32 %bbSize, -1                     ; <i32> [#uses=2]
+  %0 = icmp sgt i32 %j.03, -1                     ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+  %tmp9 = add i32 %bbStart, %bbSize               ; <i32> [#uses=1]
+  %tmp10 = add i32 %tmp9, -1                      ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: %bb
+; CHECK-NOT: movb {{.*}}l, %cl
+; CHECK: sarl %cl
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
+  %j.06 = sub i32 %j.03, %indvar                  ; <i32> [#uses=1]
+  %tmp11 = sub i32 %tmp10, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %ptr, i32 %tmp11  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = ashr i32 %j.06, %shifts                    ; <i32> [#uses=1]
+  %3 = and i32 %2, 65535                          ; <i32> [#uses=1]
+  %4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
+  store i32 %3, i32* %4, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %bbSize   ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}