AMDGPU: Make hidden argument metadata consistent with
amdgpu-implicitarg-num-bytes attribute
Differential Revision: https://reviews.llvm.org/D49096
llvm-svn: 336697
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
index d68453b..0f38dc0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUHSAMetadataStreamer.h"
-#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
@@ -255,36 +255,7 @@
for (auto &Arg : Func.args())
emitKernelArg(Arg);
- // TODO: What about other languages?
- if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
- return;
-
- auto &DL = Func.getParent()->getDataLayout();
- auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
- emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
-
- auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
- AMDGPUASI.GLOBAL_ADDRESS);
-
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
- else
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
-
- // Emit "default queue" and "completion action" arguments if enqueue kernel is
- // used, otherwise emit dummy "none" arguments.
- if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
- } else {
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- }
+ emitHiddenKernelArgs(Func);
}
void MetadataStreamer::emitKernelArg(const Argument &Arg) {
@@ -378,6 +349,48 @@
}
}
+void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+ int HiddenArgNumBytes =
+ getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+ if (!HiddenArgNumBytes)
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ if (HiddenArgNumBytes >= 8)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+ if (HiddenArgNumBytes >= 16)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+ if (HiddenArgNumBytes >= 24)
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+ AMDGPUASI.GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (HiddenArgNumBytes >= 32) {
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ else
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ }
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
+ if (HiddenArgNumBytes >= 48) {
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ }
+ }
+}
+
void MetadataStreamer::begin(const Module &Mod) {
AMDGPUASI = getAMDGPUAS(Mod);
emitVersion();