[HIP][AMDGPU] expand printf when compiling HIP to AMDGPU

Summary:
This change implements the expansion in two parts:
- Add a utility function emitAMDGPUPrintfCall() in LLVM.
- Invoke the above function from Clang CodeGen, when processing a HIP
  program for the AMDGPU target.

The printf expansion has undefined behaviour if the format string is
not a compile-time constant. As a sufficient condition, the HIP
ToolChain now emits -Werror=format-nonliteral.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D71365
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index d7e2676..bccce7d 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -120,3 +121,36 @@
   return RValue::get(Builder.CreateCall(
       VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
 }
+
+RValue
+CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+                                                ReturnValueSlot ReturnValue) {
+  assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+         E->getBuiltinCallee() == Builtin::BI__builtin_printf);
+  assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+  CallArgList CallArgs;
+  EmitCallArgs(CallArgs,
+               E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+               E->arguments(), E->getDirectCallee(),
+               /* ParamsToSkip = */ 0);
+
+  SmallVector<llvm::Value *, 8> Args;
+  for (auto A : CallArgs) {
+    // We don't know how to emit non-scalar varargs.
+    if (!A.getRValue(*this).isScalar()) {
+      CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+      return RValue::get(llvm::ConstantInt::get(IntTy, -1));
+    }
+
+    llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+    Args.push_back(Arg);
+  }
+
+  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
+  Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
+  return RValue::get(Printf);
+}