[AMDGPU] Fixed occupancy calculation for gfx10
Differential Revision: https://reviews.llvm.org/D65010
llvm-svn: 366616
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1eb9b83..716c387 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -591,25 +591,12 @@
}
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
- if (VGPRs <= 24)
- return 10;
- if (VGPRs <= 28)
- return 9;
- if (VGPRs <= 32)
- return 8;
- if (VGPRs <= 36)
- return 7;
- if (VGPRs <= 40)
- return 6;
- if (VGPRs <= 48)
- return 5;
- if (VGPRs <= 64)
- return 4;
- if (VGPRs <= 84)
- return 3;
- if (VGPRs <= 128)
- return 2;
- return 1;
+ unsigned MaxWaves = getMaxWavesPerEU();
+ unsigned Granule = getVGPRAllocGranule();
+ if (VGPRs < Granule)
+ return MaxWaves;
+ unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
+ return std::min(getTotalNumVGPRs() / RoundedRegs, MaxWaves);
}
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 78c3b82..1f30d76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -884,7 +884,7 @@
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU();
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this);
}
/// \returns Number of waves per work group supported by the subtarget and
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index e90f40e..ef50d37 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -241,7 +241,7 @@
}
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
- return getMaxWavesPerEU() * getEUsPerCU(STI);
+ return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
}
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
@@ -253,9 +253,11 @@
return 1;
}
-unsigned getMaxWavesPerEU() {
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
- return 10;
+ if (!isGFX10(*STI))
+ return 10;
+ return 20;
}
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
@@ -317,7 +319,7 @@
if (Version.Major >= 10)
return 0;
- if (WavesPerEU >= getMaxWavesPerEU())
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
return 0;
unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
@@ -394,17 +396,19 @@
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
- return 256;
+ if (!isGFX10(*STI))
+ return 256;
+ return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
}
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
- return getTotalNumVGPRs(STI);
+ return 256;
}
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU())
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 209ef7e..590df20 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -94,7 +94,7 @@
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// STI without any kind of limitation.
-unsigned getMaxWavesPerEU();
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// STI and limited by given \p FlatWorkGroupSize.