[AMDGPU] Increased vector length for global/constant loads.
Summary: GCN ISA supports instructions that can read 16 consecutive dwords from memory through the scalar data cache;
loadstoreVectorizer should take advantage of the wider vector length and pack 16/8 elements of dwords/quadwords.
Author: FarhanaAleen
Reviewed By: rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D44179
llvm-svn: 326910
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6d6ab08..4292575 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -234,12 +234,38 @@
return 32;
}
+unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ unsigned VecRegBitWidth = VF * LoadSize;
+ if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
+ // TODO: Support element-size less than 32bit?
+ return 128 / LoadSize;
+
+ return VF;
+}
+
+unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ unsigned VecRegBitWidth = VF * StoreSize;
+ if (VecRegBitWidth > 128)
+ return 128 / StoreSize;
+
+ return VF;
+}
+
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
AMDGPUAS AS = ST->getAMDGPUAS();
if (AddrSpace == AS.GLOBAL_ADDRESS ||
AddrSpace == AS.CONSTANT_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS_32BIT ||
- AddrSpace == AS.FLAT_ADDRESS)
+ AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
+ if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ return 128;
+ return 512;
+ }
+
+ if (AddrSpace == AS.FLAT_ADDRESS)
return 128;
if (AddrSpace == AS.LOCAL_ADDRESS ||
AddrSpace == AS.REGION_ADDRESS)
@@ -250,8 +276,8 @@
if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
(AddrSpace == AS.PARAM_D_ADDRESS ||
AddrSpace == AS.PARAM_I_ADDRESS ||
- (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
- AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
+ AddrSpace <= AS.CONSTANT_BUFFER_15)))
return 128;
llvm_unreachable("unhandled address space");
}