Blame - clang/test/CodeGenCUDA/link-device-bitcode.cu - toolchain/llvm-project

blob: 45e5bcff995f4a5a0355c045c63c163bfce356c1 [file] [log] [blame]

Artem Belevich	7cb25c9	2015-09-10 18:24:23 +0000	[diff] [blame]	1	// Test for linking with CUDA's libdevice as outlined in
				2	// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
				3	//
				4	// REQUIRES: nvptx-registered-target
				5	//
				6	// Prepare bitcode file to link with
				7	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t.bc \
				8	// RUN: %S/Inputs/device-code.ll
				9	//
				10	// Make sure function in device-code gets linked in and internalized.
				11	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
				12	// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -emit-llvm \
				13	// RUN: -disable-llvm-passes -o - %s \
				14	// RUN: \| FileCheck %s -check-prefix CHECK-IR
				15	//
				16	// Make sure function in device-code gets linked but is not internalized
				17	// without -fcuda-uses-libdevice
				18	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
				19	// RUN: -mlink-bitcode-file %t.bc -emit-llvm \
				20	// RUN: -disable-llvm-passes -o - %s \
				21	// RUN: \| FileCheck %s -check-prefix CHECK-IR-NLD
				22	//
				23	// Make sure NVVMReflect pass is enabled in NVPTX back-end.
				24	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
				25	// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -S -o /dev/null %s \
				26	// RUN: -backend-option -debug-pass=Structure 2>&1 \
				27	// RUN: \| FileCheck %s -check-prefix CHECK-REFLECT
				28
				29	#include "Inputs/cuda.h"
				30
				31	__device__ float device_mul_or_add(float a, float b);
				32	extern "C" __device__ double __nv_sin(double x);
				33	extern "C" __device__ double __nv_exp(double x);
				34
				35	// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
				36	// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
				37	__device__ void should_not_be_internalized(float *data) {}
				38
				39	// Make sure kernel call has not been internalized.
				40	// CHECK-IR-LABEL: define void @_Z6kernelPfS_
				41	// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
				42	__global__ __attribute__((used)) void kernel(float out, float in) {
				43	*out = device_mul_or_add(in[0], in[1]);
				44	out += __nv_exp(__nv_sin(out));
				45	should_not_be_internalized(out);
				46	}
				47
				48	// Make sure device_mul_or_add() is present in IR, is internal and
				49	// calls __nvvm_reflect().
				50	// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
				51	// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
				52	// CHECK-IR: call i32 @__nvvm_reflect
				53	// CHECK-IR: ret float
				54
				55	// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
				56	// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1