R600: Use .AMDGPU.config section to emit stacksize

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180124 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index e683d75..9271b39 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -166,6 +166,22 @@
     }
   }
 
+  unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
+    switch (ST.device()->getGeneration()) {
+    case AMDGPUDeviceInfo::HD4XXX:
+      if (hasPush)
+        StackSubEntry += 2;
+      break;
+    case AMDGPUDeviceInfo::HD5XXX:
+      if (hasPush)
+        StackSubEntry ++;
+    case AMDGPUDeviceInfo::HD6XXX:
+      StackSubEntry += 2;
+      break;
+    }
+    return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
+  }
+
 public:
   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
@@ -180,6 +196,7 @@
   virtual bool runOnMachineFunction(MachineFunction &MF) {
     unsigned MaxStack = 0;
     unsigned CurrentStack = 0;
+    bool hasPush;
     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
         ++MB) {
       MachineBasicBlock &MBB = *MB;
@@ -207,6 +224,7 @@
         case AMDGPU::CF_ALU_PUSH_BEFORE:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
+          hasPush = true;
         case AMDGPU::CF_ALU:
         case AMDGPU::EG_ExportBuf:
         case AMDGPU::EG_ExportSwz:
@@ -218,7 +236,7 @@
           CfCount++;
           break;
         case AMDGPU::WHILELOOP: {
-          CurrentStack++;
+          CurrentStack+=4;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_WHILE_LOOP))
@@ -232,7 +250,7 @@
           break;
         }
         case AMDGPU::ENDLOOP: {
-          CurrentStack--;
+          CurrentStack-=4;
           std::pair<unsigned, std::set<MachineInstr *> > Pair =
               LoopStack.back();
           LoopStack.pop_back();
@@ -321,9 +339,7 @@
           break;
         }
       }
-      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-          TII->get(AMDGPU::STACK_SIZE))
-          .addImm(MaxStack);
+      MFI->StackSize = getHWStackSize(MaxStack, hasPush);
     }
 
     return false;