AMDGPU: Allow vectorization of packed types
llvm-svn: 305844
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0d6689b..88245b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -184,9 +184,9 @@
}
}
-unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
- if (Vec)
- return 0;
+unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ // The concept of vector registers doesn't really exist. Some packed vector
+ // operations operate on the normal 32-bit registers.
// Number of VGPRs on SI.
if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -195,8 +195,18 @@
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
+unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const {
+ // This is really the number of registers to fill when vectorizing /
+ // interleaving loops, so we lie to avoid trying to use all registers.
+ return getHardwareNumberOfRegisters(Vec) >> 3;
+}
+
unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const {
- return Vector ? 0 : 32;
+ return 32;
+}
+
+unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const {
+ return 32;
}
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
@@ -247,11 +257,11 @@
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
if (VF == 1)
return 1;
- // Semi-arbitrary large amount.
- return 64;
+ return 8;
}
int AMDGPUTTIImpl::getArithmeticInstrCost(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index a60b1bb..485e204 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -75,8 +75,10 @@
return TTI::PSK_FastHardware;
}
- unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getHardwareNumberOfRegisters(bool Vector) const;
+ unsigned getNumberOfRegisters(bool Vector) const;
+ unsigned getRegisterBitWidth(bool Vector) const ;
+ unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,