Inflate register classes after coalescing. Coalescing can remove copy-like instructions with sub-register operands that constrained the register class. Examples are: x86: GR32_ABCD:sub_8bit_hi -> GR32 arm: DPR_VFP2:ssub0 -> DPR Recompute the register class of any virtual registers that are used by less instructions after coalescing. This affects code generation for the Cortex-A8 where we use NEON instructions for f32 operations, c.f. fp_convert.ll: vadd.f32 d16, d1, d0 vcvt.s32.f32 d0, d16 The register allocator is now free to use d16 for the temporary, and that comes first in the allocation order because it doesn't interfere with any s-registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137133 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 4a74b3b933e2944ff313dc5d24da6f9e8ec4c1c4 [log] [tgz]
author: Jakob Stoklund Olesen <stoklund@2pi.dk> Tue Aug 09 18:19:41 2011 +0000
committer: Jakob Stoklund Olesen <stoklund@2pi.dk> Tue Aug 09 18:19:41 2011 +0000
tree: 93ee9fb3cd121c4c46e3644a7597784dc185a785
parent: e2406dfd8940b3178bf452d89fed2a5df7a63043 [diff]
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index d2087f9..c07970d 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp

@@ -55,6 +55,7 @@
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
 STATISTIC(numAborts   , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
 
 static cl::opt<bool>
 EnableJoining("join-liveintervals",
@@ -1852,7 +1853,7 @@
 
   // Perform a final pass over the instructions and compute spill weights
   // and remove identity moves.
-  SmallVector<unsigned, 4> DeadDefs;
+  SmallVector<unsigned, 4> DeadDefs, InflateRegs;
   for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* mbb = mbbi;
@@ -1864,6 +1865,16 @@
         bool DoDelete = true;
         assert(MI->isCopyLike() && "Unrecognized copy instruction");
         unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        unsigned DstReg = MI->getOperand(0).getReg();
+
+        // Collect candidates for register class inflation.
+        if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+          InflateRegs.push_back(SrcReg);
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+          InflateRegs.push_back(DstReg);
+
         if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
             MI->getNumOperands() > 2)
           // Do not delete extract_subreg, insert_subreg of physical
@@ -1905,8 +1916,12 @@
           unsigned Reg = MO.getReg();
           if (!Reg)
             continue;
-          if (TargetRegisterInfo::isVirtualRegister(Reg))
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
             DeadDefs.push_back(Reg);
+            // Remat may also enable register class inflation.
+            if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+              InflateRegs.push_back(Reg);
+          }
           if (MO.isDead())
             continue;
           if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
@@ -1954,6 +1969,24 @@
     }
   }
 
+  // After deleting a lot of copies, register classes may be less constrained.
+  // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+  // DPR inflation.
+  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+                    InflateRegs.end());
+  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+    unsigned Reg = InflateRegs[i];
+    if (MRI->reg_nodbg_empty(Reg))
+      continue;
+    if (MRI->recomputeRegClass(Reg, *TM)) {
+      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+                   << MRI->getRegClass(Reg)->getName() << '\n');
+      ++NumInflated;
+    }
+  }
+
   DEBUG(dump());
   DEBUG(LDV->dump());
   if (VerifyCoalescing)

diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 51efe51..45c322d 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll

@@ -22,6 +22,8 @@
 ; NFP0: 	vabs.f32	s1, s1
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vabs.f32	d1, d1
+; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
+; CORTEXA8: 	vabs.f32	{{d[0-9]+}}, [[D1]]
+
 ; CORTEXA9: test:
 ; CORTEXA9: 	vabs.f32	s{{.}}, s{{.}}

diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 86c06f1..7002cec 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll

@@ -7,7 +7,8 @@
 ; VFP2: test1:
 ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
 ; NEON: test1:
-; NEON: vcvt.s32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
 entry:
         %0 = fadd float %a, %b
         %1 = fptosi float %0 to i32
@@ -18,7 +19,8 @@
 ; VFP2: test2:
 ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
 ; NEON: test2:
-; NEON: vcvt.u32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
 entry:
         %0 = fadd float %a, %b
         %1 = fptoui float %0 to i32
commit	4a74b3b933e2944ff313dc5d24da6f9e8ec4c1c4	[log] [tgz]
author	Jakob Stoklund Olesen <stoklund@2pi.dk>	Tue Aug 09 18:19:41 2011 +0000
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>	Tue Aug 09 18:19:41 2011 +0000
tree	93ee9fb3cd121c4c46e3644a7597784dc185a785
parent	e2406dfd8940b3178bf452d89fed2a5df7a63043 [diff]