AMDGPU/SI: Extend promoting alloca to vector to arrays of up to 16 elements
Summary:
This patch extends the promotion of alloca to vector to the arrays of up to 16 elements. Also we introduce
an option, -disable-promote-alloca-to-vector, to switch promotion to vector off, if needed.
Reviewers:
arsenm
Differential Revision:
https://reviews.llvm.org/D33559
llvm-svn: 325372
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 419d481..cf8469f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -65,6 +65,11 @@
namespace {
+static cl::opt<bool> DisablePromoteAllocaToVector(
+ "disable-promote-alloca-to-vector",
+ cl::desc("Disable promote alloca to vector"),
+ cl::init(false));
+
// FIXME: This can create globals so should be a module pass.
class AMDGPUPromoteAlloca : public FunctionPass {
private:
@@ -337,6 +342,12 @@
}
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
+
+ if (DisablePromoteAllocaToVector) {
+ DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
+ return false;
+ }
+
ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -346,7 +357,7 @@
// FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
// could also be promoted but we don't currently handle this case
if (!AllocaTy ||
- AllocaTy->getNumElements() > 4 ||
+ AllocaTy->getNumElements() > 16 ||
AllocaTy->getNumElements() < 2 ||
!VectorType::isValidElementType(AllocaTy->getElementType())) {
DEBUG(dbgs() << " Cannot convert type to vector\n");