AMDGPU: Always emit amdgpu-flat-work-group-size
The backend default maximum should be the hardware maximum, so the
frontend should set the implementation defined default maximum.
llvm-svn: 370101
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 1124154..231a20c 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7912,8 +7912,11 @@
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
- if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) ||
- (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
+
+ const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
+ FD->hasAttr<OpenCLKernelAttr>();
+ if ((IsOpenCLKernel ||
+ (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
@@ -7939,6 +7942,9 @@
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
+ } else if (IsOpenCLKernel) {
+ // By default, restrict the maximum size to 256.
+ F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {