blob: 69dc051355de846ed2e0fd9b45458a5a8b440e78 [file] [log] [blame]
Artem Belevich7cb25c92015-09-10 18:24:23 +00001// Test for linking with CUDA's libdevice as outlined in
2// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
3//
4// REQUIRES: nvptx-registered-target
5//
6// Prepare bitcode file to link with
Justin Lebar19b648ea2016-03-30 23:45:38 +00007// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
8// RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
9// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
10// RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
Artem Belevich7cb25c92015-09-10 18:24:23 +000011//
12// Make sure function in device-code gets linked in and internalized.
13// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
Matt Arsenaulta13746b2018-08-20 18:16:48 +000014// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \
15// RUN: -disable-llvm-passes -o - %s \
16// RUN: | FileCheck %s -check-prefix CHECK-IR
17
18// Make sure legacy flag name works
19// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
Artem Belevich5d40ae32015-10-27 17:56:59 +000020// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \
Artem Belevich7cb25c92015-09-10 18:24:23 +000021// RUN: -disable-llvm-passes -o - %s \
22// RUN: | FileCheck %s -check-prefix CHECK-IR
23//
Artem Belevich5d40ae32015-10-27 17:56:59 +000024// Make sure we can link two bitcode files.
25// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
Matt Arsenaulta13746b2018-08-20 18:16:48 +000026// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
Artem Belevich5d40ae32015-10-27 17:56:59 +000027// RUN: -emit-llvm -disable-llvm-passes -o - %s \
28// RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
29//
Artem Belevich7cb25c92015-09-10 18:24:23 +000030// Make sure function in device-code gets linked but is not internalized
31// without -fcuda-uses-libdevice
32// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
33// RUN: -mlink-bitcode-file %t.bc -emit-llvm \
34// RUN: -disable-llvm-passes -o - %s \
35// RUN: | FileCheck %s -check-prefix CHECK-IR-NLD
36//
37// Make sure NVVMReflect pass is enabled in NVPTX back-end.
38// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
Matt Arsenaulta13746b2018-08-20 18:16:48 +000039// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
Eli Friedman01d349b2018-04-12 22:21:36 +000040// RUN: -mllvm -debug-pass=Structure 2>&1 \
Artem Belevich7cb25c92015-09-10 18:24:23 +000041// RUN: | FileCheck %s -check-prefix CHECK-REFLECT
42
43#include "Inputs/cuda.h"
44
45__device__ float device_mul_or_add(float a, float b);
46extern "C" __device__ double __nv_sin(double x);
47extern "C" __device__ double __nv_exp(double x);
48
49// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
50// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
51__device__ void should_not_be_internalized(float *data) {}
52
53// Make sure kernel call has not been internalized.
54// CHECK-IR-LABEL: define void @_Z6kernelPfS_
55// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
56__global__ __attribute__((used)) void kernel(float *out, float *in) {
57 *out = device_mul_or_add(in[0], in[1]);
58 *out += __nv_exp(__nv_sin(*out));
59 should_not_be_internalized(out);
60}
61
62// Make sure device_mul_or_add() is present in IR, is internal and
63// calls __nvvm_reflect().
64// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
65// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
66// CHECK-IR: call i32 @__nvvm_reflect
67// CHECK-IR: ret float
68
Artem Belevich5d40ae32015-10-27 17:56:59 +000069// Make sure we've linked in and internalized only needed functions
70// from the second bitcode file.
71// CHECK-IR-2-LABEL: define internal double @__nv_sin
72// CHECK-IR-2-LABEL: define internal double @__nv_exp
73// CHECK-IR-2-NOT: double @__unused
74
Artem Belevich7cb25c92015-09-10 18:24:23 +000075// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
76// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1