[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317

commit: 7cb25c9b69c9095ea268b13c4b75d92e3f2ac3e9 [log] [tgz]
author: Artem Belevich <tra@google.com> Thu Sep 10 18:24:23 2015 +0000
committer: Artem Belevich <tra@google.com> Thu Sep 10 18:24:23 2015 +0000
tree: a673f37406032a92c70b87378a9ed24350c149e7
parent: 7ad7ae1fac53b9f5743eb55808a0d39f63295dff [diff] [blame]
diff --git a/clang/test/CodeGenCUDA/link-device-bitcode.cu b/clang/test/CodeGenCUDA/link-device-bitcode.cu
new file mode 100644
index 0000000..45e5bcf
--- /dev/null
+++ b/clang/test/CodeGenCUDA/link-device-bitcode.cu

@@ -0,0 +1,56 @@
+// Test for linking with CUDA's libdevice as outlined in
+// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
+//
+// REQUIRES: nvptx-registered-target
+//
+// Prepare bitcode file to link with
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t.bc \
+// RUN:    %S/Inputs/device-code.ll
+//
+// Make sure function in device-code gets linked in and internalized.
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -emit-llvm \
+// RUN:    -disable-llvm-passes -o - %s \
+// RUN:    | FileCheck %s -check-prefix CHECK-IR
+//
+// Make sure function in device-code gets linked but is not internalized
+// without -fcuda-uses-libdevice
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
+// RUN:    -disable-llvm-passes -o - %s \
+// RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
+//
+// Make sure NVVMReflect pass is enabled in NVPTX back-end.
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -S -o /dev/null %s \
+// RUN:    -backend-option -debug-pass=Structure 2>&1 \
+// RUN:    | FileCheck %s -check-prefix CHECK-REFLECT
+
+#include "Inputs/cuda.h"
+
+__device__ float device_mul_or_add(float a, float b);
+extern "C" __device__ double __nv_sin(double x);
+extern "C" __device__ double __nv_exp(double x);
+
+// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
+// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
+__device__ void should_not_be_internalized(float *data) {}
+
+// Make sure kernel call has not been internalized.
+// CHECK-IR-LABEL: define void @_Z6kernelPfS_
+// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
+__global__ __attribute__((used)) void kernel(float *out, float *in) {
+  *out = device_mul_or_add(in[0], in[1]);
+  *out += __nv_exp(__nv_sin(*out));
+  should_not_be_internalized(out);
+}
+
+// Make sure device_mul_or_add() is present in IR, is internal and
+// calls __nvvm_reflect().
+// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
+// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
+// CHECK-IR: call i32 @__nvvm_reflect
+// CHECK-IR: ret float
+
+// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
+// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
commit	7cb25c9b69c9095ea268b13c4b75d92e3f2ac3e9	[log] [tgz]
author	Artem Belevich <tra@google.com>	Thu Sep 10 18:24:23 2015 +0000
committer	Artem Belevich <tra@google.com>	Thu Sep 10 18:24:23 2015 +0000
tree	a673f37406032a92c70b87378a9ed24350c149e7
parent	7ad7ae1fac53b9f5743eb55808a0d39f63295dff [diff] [blame]