Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 1 | // Test for linking with CUDA's libdevice as outlined in |
| 2 | // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice |
| 3 | // |
| 4 | // REQUIRES: nvptx-registered-target |
| 5 | // |
| 6 | // Prepare bitcode file to link with |
Justin Lebar | 19b648ea | 2016-03-30 23:45:38 +0000 | [diff] [blame] | 7 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ |
| 8 | // RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll |
| 9 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ |
| 10 | // RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll |
Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 11 | // |
| 12 | // Make sure function in device-code gets linked in and internalized. |
| 13 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ |
Matt Arsenault | a13746b | 2018-08-20 18:16:48 +0000 | [diff] [blame] | 14 | // RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \ |
| 15 | // RUN: -disable-llvm-passes -o - %s \ |
| 16 | // RUN: | FileCheck %s -check-prefix CHECK-IR |
| 17 | |
| 18 | // Make sure legacy flag name works |
| 19 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ |
Artem Belevich | 5d40ae3 | 2015-10-27 17:56:59 +0000 | [diff] [blame] | 20 | // RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \ |
Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 21 | // RUN: -disable-llvm-passes -o - %s \ |
| 22 | // RUN: | FileCheck %s -check-prefix CHECK-IR |
| 23 | // |
Artem Belevich | 5d40ae3 | 2015-10-27 17:56:59 +0000 | [diff] [blame] | 24 | // Make sure we can link two bitcode files. |
| 25 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ |
Matt Arsenault | a13746b | 2018-08-20 18:16:48 +0000 | [diff] [blame] | 26 | // RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \ |
Artem Belevich | 5d40ae3 | 2015-10-27 17:56:59 +0000 | [diff] [blame] | 27 | // RUN: -emit-llvm -disable-llvm-passes -o - %s \ |
| 28 | // RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2 |
| 29 | // |
Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 30 | // Make sure function in device-code gets linked but is not internalized |
| 31 | // without -fcuda-uses-libdevice |
| 32 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ |
| 33 | // RUN: -mlink-bitcode-file %t.bc -emit-llvm \ |
| 34 | // RUN: -disable-llvm-passes -o - %s \ |
| 35 | // RUN: | FileCheck %s -check-prefix CHECK-IR-NLD |
| 36 | // |
| 37 | // Make sure NVVMReflect pass is enabled in NVPTX back-end. |
| 38 | // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ |
Matt Arsenault | a13746b | 2018-08-20 18:16:48 +0000 | [diff] [blame] | 39 | // RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \ |
Eli Friedman | 01d349b | 2018-04-12 22:21:36 +0000 | [diff] [blame] | 40 | // RUN: -mllvm -debug-pass=Structure 2>&1 \ |
Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 41 | // RUN: | FileCheck %s -check-prefix CHECK-REFLECT |
| 42 | |
| 43 | #include "Inputs/cuda.h" |
| 44 | |
| 45 | __device__ float device_mul_or_add(float a, float b); |
| 46 | extern "C" __device__ double __nv_sin(double x); |
| 47 | extern "C" __device__ double __nv_exp(double x); |
| 48 | |
| 49 | // CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf( |
| 50 | // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf( |
| 51 | __device__ void should_not_be_internalized(float *data) {} |
| 52 | |
| 53 | // Make sure kernel call has not been internalized. |
| 54 | // CHECK-IR-LABEL: define void @_Z6kernelPfS_ |
| 55 | // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_( |
| 56 | __global__ __attribute__((used)) void kernel(float *out, float *in) { |
| 57 | *out = device_mul_or_add(in[0], in[1]); |
| 58 | *out += __nv_exp(__nv_sin(*out)); |
| 59 | should_not_be_internalized(out); |
| 60 | } |
| 61 | |
| 62 | // Make sure device_mul_or_add() is present in IR, is internal and |
| 63 | // calls __nvvm_reflect(). |
| 64 | // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( |
| 65 | // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff( |
| 66 | // CHECK-IR: call i32 @__nvvm_reflect |
| 67 | // CHECK-IR: ret float |
| 68 | |
Artem Belevich | 5d40ae3 | 2015-10-27 17:56:59 +0000 | [diff] [blame] | 69 | // Make sure we've linked in and internalized only needed functions |
| 70 | // from the second bitcode file. |
| 71 | // CHECK-IR-2-LABEL: define internal double @__nv_sin |
| 72 | // CHECK-IR-2-LABEL: define internal double @__nv_exp |
| 73 | // CHECK-IR-2-NOT: double @__unused |
| 74 | |
Artem Belevich | 7cb25c9 | 2015-09-10 18:24:23 +0000 | [diff] [blame] | 75 | // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. |
| 76 | // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1 |